
## Replication script for the paper "Persecuted Minorities and Defensive
## Cooperation: Contributions to Public Goods by Hindus and Muslims in Delhi
## Slums"

## Change the working directory below to the replication folder on your machine
## SCRIPT WILL NOT RUN WITHOUT EDITING THIS LINE
my_dir <- file.path("~/Dropbox/CR-PublicGoods/replication_cps")

## Other sub-directories used - create them if they are not there
if (!dir.exists(file.path(my_dir, "figs"))) {
    dir.create(file.path(my_dir, "figs"))
}
if (!dir.exists(file.path(my_dir, "tabs"))) {
    dir.create(file.path(my_dir, "tabs"))
}
figs_dir <- file.path(my_dir, "figs")
tabs_dir <- file.path(my_dir, "tabs")

## Load required libraries
library(psych)
library(tidyverse)
library(data.table)
library(ggplot2)
library(scales)
library(lubridate)
library(stringr)
library(readxl)
library(stargazer)
library(interplot)
library(AER)
library(RItools)
library(sp)
library(geosphere)
library(rgeos)
library(cowplot)
library(interflex)

## Set ggplot options
theme_new <- theme_set(theme_bw(base_family = "serif", base_size = 10))
theme_new <- theme_update(
    axis.title.x = element_text(vjust = -0.5),
    axis.title.y = element_text(vjust = 1.25, angle = 90)
)

## R Session Info, to help with troubleshooting if you run into issues
## > sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4

## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

## time zone: America/Denver
## tzcode source: internal

## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base

## other attached packages:
##  [1] interflex_1.2.6   cowplot_1.1.1     rgeos_0.6-2       geosphere_1.5-18
##  [5] sp_1.6-0          RItools_0.3-3     AER_1.2-10        survival_3.5-5
##  [9] sandwich_3.0-2    lmtest_0.9-40     zoo_1.8-12        car_3.1-2
## [13] carData_3.0-5     interplot_0.2.3   arm_1.13-1        lme4_1.1-33
## [17] Matrix_1.5-4      MASS_7.3-60       abind_1.4-5       stargazer_5.2.3
## [21] readxl_1.4.2      scales_1.2.1      data.table_1.14.8 lubridate_1.9.2
## [25] forcats_1.0.0     stringr_1.5.0     dplyr_1.1.2       purrr_1.0.1
## [29] readr_2.1.4       tidyr_1.3.0       tibble_3.2.1      ggplot2_3.4.2
## [33] tidyverse_2.0.0   psych_2.3.3

## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.0     pcse_1.9.1.1         pROC_1.18.2
##  [4] digest_0.6.31        timechange_0.2.0     lifecycle_1.0.3
##  [7] magrittr_2.0.3       compiler_4.3.0       rlang_1.1.1
## [10] tools_4.3.0          utf8_1.2.3           mnormt_2.1.1
## [13] plyr_1.8.8           RColorBrewer_1.1-3   withr_2.5.0
## [16] Lmoments_1.3-1       grid_4.3.0           fansi_1.0.4
## [19] lfe_2.9-0            xtable_1.8-4         colorspace_2.1-0
## [22] future_1.32.0        globals_0.16.2       iterators_1.0.14
## [25] cli_3.6.1            mvtnorm_1.1-3        generics_0.1.3
## [28] tzdb_0.4.0           minqa_1.2.5          splines_4.3.0
## [31] parallel_4.3.0       ggplotify_0.1.0      cellranger_1.1.0
## [34] vctrs_0.6.2          yulab.utils_0.0.6    boot_1.3-28.1
## [37] interactionTest_1.2  SparseM_1.81         gridGraphics_0.5-1
## [40] hms_1.1.3            svd_0.5.4            Formula_1.2-5
## [43] listenv_0.9.0        foreach_1.5.2        glue_1.6.2
## [46] parallelly_1.35.0    nloptr_2.0.3         codetools_0.2-19
## [49] stringi_1.7.12       gtable_0.3.3         munsell_0.5.0
## [52] pillar_1.9.0         R6_2.5.1             doParallel_1.0.17
## [55] lattice_0.21-8       Rcpp_1.0.10          coda_0.19-4
## [58] gridExtra_2.3        nlme_3.1-162         mgcv_1.8-42
## [61] ModelMetrics_1.2.2.2 pkgconfig_2.0.3

################################################################################
#### Code run to prepare dataset
################################################################################

## In order to not include identifiable information on respondents, we clean and
## de-identify our data before reading it in for the analyses in our
## paper. We include the code, commented out, that we used to clean our
## data. Afterward, we begin by reading in the cleaned version of the data.

## ## Reading function that takes out the second and third rows
## read_csv <- function(my_file) {
##     headers <- read.csv(my_file, header = FALSE, nrows = 1, as.is = TRUE,
##                         stringsAsFactors = FALSE)
##     headers <- gsub(" +", "", headers)
##     headers <- gsub("\\(|\\)", "", headers)
##     my_data <- read.csv(my_file, header = FALSE, skip = 3,
##                         stringsAsFactors = FALSE)
##     colnames(my_data) <- headers
##     return(my_data)
## }

## ## Standardization function
## range01 <- function(x, mymin, mymax) {
##     (x - mymin) / (mymax - mymin)
## }

## ## Reverse code function
## revcode <- function(x, mymin, mymax) {
##     mymax + mymin - x
## }

## ## Read in data
## mydat <- as.data.table(read_csv(file.path(my_dir, "MainSurvey-2018-06-27.csv")))
## question_vars <- readLines(file.path(my_dir, "question_vars.txt"))
## vars_na_preclean <- readLines(file.path(my_dir, "vars_NA_preclean.txt"))

## ## Getting date and formatting the datetime variables
## tzqual <- "America/New_York"
## tzreal <- "Asia/Calcutta"
## mydat[, StartDate := with_tz(as.POSIXct(StartDate, tz = tzqual), tzreal)]
## mydat[, EndDate := with_tz(as.POSIXct(EndDate, tz = tzqual), tzreal)]
## mydat[, RecordedDate := with_tz(as.POSIXct(RecordedDate, tz = tzqual), tzreal)]
## mydat[, date := as.Date(StartDate)]
## mydat[, duration := difftime(EndDate, StartDate, units = "secs")]
## rm(tzqual, tzreal)

## ## Cutting out the results from the pilot
## mydat <- mydat[date > "2018-05-31"]
## nrow(mydat) #for later in the appendix, reference this number there - 5814

## ## A few of the dates were recorded incorrectly - they were recorded as having
## ## occurred in July, when in fact they occurred in June (and the date they were
## ## recorded/uploaded indicates that) - below, I correct these dates
## myind <- mydat$date %in% as.Date(c("2018-07-02", "2018-07-03"))
## changedates01 <- mydat[myind]$StartDate
## changedates02 <- mydat[myind]$EndDate
## changedates03 <- mydat[myind]$date
## month(changedates01) <- 6
## month(changedates02) <- 6
## month(changedates03) <- 6
## mydat[myind, StartDate := changedates01]
## mydat[myind, EndDate := changedates02]
## mydat[myind, date := changedates03]
## rm(myind, changedates01, changedates02, changedates03)

## ## Get treatment assignment
## my_vars <- grep("cond[0-9][a-b]_ClickCount", names(mydat), value = TRUE)
## treat <- mydat[, ..my_vars]
## treat <- as.data.frame(!is.na(treat))
## for (i in names(treat)) {
##     treat[, i] <- ifelse(treat[, i], gsub(".+([0-9]).+", "\\1", i), 0)
## }
## treat <- apply(treat, 2, as.numeric)
## treat <- rowSums(treat)
## treat[treat == 0] <- NA
## mydat[, treat := as.factor(treat)]
## mydat[, talt01 := NA_character_]
## mydat[treat %in% 1:2, talt01 := "c_comb"]
## mydat[treat %in% 4:5, talt01 := "h_comb"]
## mydat[treat %in% 7:8, talt01 := "v_comb"]
## mydat[treat %in% 3, talt01 := "c_iu"]
## mydat[treat %in% 6, talt01 := "h_iu"]
## mydat[treat %in% 9, talt01 := "v_iu"]
## newlevels <- c("c_comb", "c_iu", "h_comb", "h_iu", "v_comb", "v_iu")
## mydat[, talt01 := factor(talt01, levels = newlevels)]

## ## Numbers to reference later on in the flowchart
## with(mydat, table(treat)) #to reference later on
## nrow(mydat[treat == 1])   #668
## table(mydat[treat %in% c(3, 4, 6, 7, 9)]$treat) #635, 649, 615, 630, 649
## nrow(mydat[treat %in% c(2, 5, 8)])              #1954

## ## The way these map to the new treatments I create later on:
## ## old 1 = new 1
## ## old 4 = new 2
## ## old 7 = new 3
## ## old 3 = new 4
## ## old 6 = new 5
## ## old 9 = new 6

## ## NAs and Corrections
## ## for explanations of these decisions, see the text file
## ## "NA_and_Corrections_Notes.txt"
## mydat[enumerator == "", enumerator := NA]
## mydat[hh_children_1_TEXT == 34, hh_children_1_TEXT := 3]
## mydat[hh_members_1_TEXT %% 1 != 0,
##       hh_members_1_TEXT := hh_adult_1_TEXT + hh_children_1_TEXT]
## mydat[hh_children_1_TEXT %% 1 != 0,
##       hh_children_1_TEXT := hh_members_1_TEXT - hh_adult_1_TEXT]
## mydat[hh_adult_1_TEXT < 0, hh_adult_1_TEXT := hh_adult_1_TEXT * -1]
## mydat[, hh_members_1_TEXT := as.integer(hh_members_1_TEXT)]
## mydat[, hh_adult_1_TEXT := as.integer(hh_adult_1_TEXT)]
## mydat[, hh_children_1_TEXT := as.integer(hh_children_1_TEXT)]
## mydat[(hh_members_1_TEXT > (hh_adult_1_TEXT + hh_children_1_TEXT)) &
##       !(hh_adult_1_TEXT == 0 & hh_children_1_TEXT == 0),
##       hh_members_1_TEXT := hh_adult_1_TEXT + hh_children_1_TEXT]
## mydat[hh_members_1_TEXT != 0 & hh_adult_1_TEXT == 0 & hh_children_1_TEXT == 0,
##       hh_adult_1_TEXT := hh_members_1_TEXT]
## mydat[hh_adult_1_TEXT > hh_members_1_TEXT,
##       hh_adult_1_TEXT := hh_members_1_TEXT - hh_children_1_TEXT]
## mydat[hh_children_1_TEXT > hh_members_1_TEXT,
##       hh_members_1_TEXT := hh_adult_1_TEXT + hh_children_1_TEXT] #rethink
## mydat[hh_members_1_TEXT != (hh_adult_1_TEXT + hh_children_1_TEXT),
##       hh_members_1_TEXT := hh_adult_1_TEXT + hh_children_1_TEXT]
## mydat[age_1_TEXT < 18, age_1_TEXT := NA]
## mydat[res_yrs_1_TEXT == 199999, res_yrs_1_TEXT := 1999]
## mydat[res_yrs_1_TEXT == 155, res_yrs_1_TEXT := 15]
## mydat[res_yrs_1_TEXT == 200, res_yrs_1_TEXT := 20]
## mydat[res_yrs_1_TEXT == 888, res_yrs_1_TEXT := 8]
## mydat[res_yrs_1_TEXT > 1000, res_yrs_1_TEXT := 2018 - res_yrs_1_TEXT]
## mydat[res_yrs_delhi_1_TEXT > 1000,
##       res_yrs_delhi_1_TEXT := 2018 - res_yrs_delhi_1_TEXT]
## mydat[res_yrs_delhi_1_TEXT == 226, res_yrs_delhi_1_TEXT := 26]
## mydat[res_yrs_delhi_1_TEXT == 0.2, res_yrs_delhi_1_TEXT := 20]
## mydat[neighborhood == 99, neighborhood := 999]
## mydat[neighborhood == 4, neighborhood := 3] #evaluate this later
## mydat[figrel_pradahn == 999, figrel_pradahn := 4]
## mydat[figrel_gov == 999, figrel_gov := 3]
## mydat[figrel_party == 999, figrel_party := 3]
## mydat[, pradhan_influence_orig := pradhan_influence]
## mydat[pradhan_influence == 999, pradhan_influence := 1]
## involve_local <- grep("involve_local_[1-5]$", names(mydat), value = TRUE)
## mydat[rowSums(mydat[, ..involve_local], na.rm = TRUE) > 0 & involve_local_6 == 1,
##       involve_local_6 := NA]
## myvars <- grep("involve_local_", names(mydat), value = TRUE)
## myind <- rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars])
## for (col in myvars) {
##     set(mydat, i = which(!myind & is.na(mydat[[col]])), j = col, value = 0)
## }
## tol_neighbors <- grep("tol_neighbors_[1-4]$", names(mydat), value = TRUE)
## mydat[rowSums(mydat[, ..tol_neighbors], na.rm = TRUE) > 0 & tol_neighbors_5 == 1,
##       tol_neighbors_5 := NA]
## myvars <- grep("tol_neighbors_", names(mydat), value = TRUE)
## myind <- rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars])
## for (col in myvars) {
##     set(mydat, i = which(!myind & is.na(mydat[[col]])), j = col, value = 0)
## }
## mydat[drain_prob == 997, drain_prob := 0]
## myvars <- grep("drain_action_[0-9][0]{0,}$", names(mydat), value = TRUE)
## mydat[rowSums(mydat[, ..myvars], na.rm = TRUE) > 0 & drain_action_11 == 1,
##       drain_action_11 := NA]
## myvars <- grep("drain_action_[0-9]+$", names(mydat), value = TRUE)
## myind <- rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars])
## for (col in myvars) {
##     set(mydat, i = which(!myind & is.na(mydat[[col]])), j = col, value = 0)
## }
## mydat[num_floors_1_TEXT == 0.1, num_floors_1_TEXT := 1]
## mydat[num_floors_1_TEXT == 10, num_floors_1_TEXT := 1]
## mydat[num_floors_1_TEXT == 22, num_floors_1_TEXT := 2]
## mydat[num_floors_1_TEXT == 32, num_floors_1_TEXT := 3]
## mydat[num_floors_1_TEXT == 128, num_floors_1_TEXT := 1]
## mydat[, num_floors_1_TEXT := num_floors_1_TEXT + 1]
## for (col in vars_na_preclean) {
##     set(mydat, i = which(mydat[[col]] %in% 997:999), j = col, value = NA)
## }
## mydat[rooms_1_TEXT == 0.1, rooms_1_TEXT := 1]
## mydat[rooms_1_TEXT == 0.2, rooms_1_TEXT := 2]
## mydat[rooms_1_TEXT == 36, rooms_1_TEXT := 12]

## ## Construct variables to be used in analysis
## mydat[, own := res_type]
## mydat[own %in% 2:3, own := 0]
## mydat[, moving := res_move]
## mydat[moving %in% 1:2, moving := 1]
## mydat[, musVhindu := rel_id]
## mydat[!(musVhindu %in% 1:2), musVhindu := NA]
## mydat[, musVhindu := musVhindu - 1]
## ## Put in caste stuff here
## myvars <- grep("relact_", names(mydat), value = TRUE)
## mydat[, relact_ave := rowMeans(.SD, na.rm = TRUE), .SDcols = myvars]
## mydat[rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars]),
##       relact_ave := NA]
## myvars <- grep("poleff_", names(mydat), value = TRUE)
## myvars <- myvars[-which(myvars == "poleff_pradhan")]
## mydat[, poleff_ave := rowMeans(.SD, na.rm = TRUE), .SDcols = myvars]
## mydat[rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars]),
##       poleff_ave := NA]
## figrel <- grep("figrel_", names(mydat), value = TRUE)
## figrel <- mydat[, ..figrel]
## figrel <- figrel[, lapply(.SD, plyr::mapvalues, from = 1:4, to = c(1, 1, 1, 0))]
## figrel_var <- rowSums(figrel, na.rm = TRUE)
## figrel_var[rowSums(is.na(figrel)) == 7] <- NA
## mydat[, figrel_sum := figrel_var]
## involve_local <- grep("involve_local_[1-5]$", names(mydat), value = TRUE)
## involve_local <- mydat[, ..involve_local]
## involve_local_var <- rowSums(involve_local, na.rm = TRUE)
## involve_local_var[mydat$involve_local_997 == 1 | mydat$involve_local_998 == 1] <- NA
## involve_local <- grep("involve_local_", names(mydat), value = TRUE)
## involve_local_var[rowSums(is.na(mydat[, ..involve_local])) == 8] <- NA
## mydat[, involve_local_sum := involve_local_var]
## tol_neighbors <- grep("tol_neighbors_[1-4]$", names(mydat), value = TRUE)
## tol_neighbors <- mydat[, ..tol_neighbors]
## tol_neighbors_var <- rowSums(tol_neighbors, na.rm = TRUE)
## tol_neighbors_var[mydat$tol_neighbors_997 == 1 | mydat$tol_neighbors_998 == 1] <- NA
## mydat[, tol_neighbors_sum := tol_neighbors_var]
## myvarsall <- grep("socties", names(mydat), value = TRUE)
## myvars <- grep("socties3_", names(mydat), value = TRUE)
## mydat[, socties3_ave := rowMeans(.SD, na.rm = TRUE), .SDcols = myvars]
## mydat[rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars]),
##       socties3_ave := NA]
## myvars <- grep("socties4_", names(mydat), value = TRUE)
## mydat[, socties4_ave := rowMeans(.SD, na.rm = TRUE), .SDcols = myvars]
## mydat[rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars]),
##       socties4_ave := NA]
## myvars <- grep("polact_2015_", names(mydat), value = TRUE)
## mydat[, polact_2015_sum := rowSums(.SD, na.rm = TRUE), .SDcols = myvars]
## mydat[rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars]),
##       polact_2015_sum := NA]
## myvars <- grep("diff_", names(mydat), value = TRUE)
## mydat[, diff_ave := rowMeans(.SD, na.rm = TRUE), .SDcols = myvars]
## mydat[rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars]),
##       diff_ave := NA]
## myvars <- grep("assets_", names(mydat), value = TRUE)
## mydat[, assets_perc := rowMeans(.SD, na.rm = TRUE) / ncol(.SD), .SDcols = myvars]
## mydat[rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars]),
##       assets_perc := NA]
## ## Another asset measure
## mydat[, assets_3level := 1]
## mydat[assets_bike == 1 | assets_cooler == 1 | assets_fridge == 1,
##       assets_3level := 2]
## mydat[assets_2wheel == 1 | assets_3wheel == 1 | assets_4wheel == 1 | assets_comp == 1 | assets_ac == 1,
##       assets_3level := 3]
## mydat[rowSums(is.na(mydat[, ..myvars])) == ncol(mydat[, ..myvars]),
##       assets_3level := NA]
## mydat[, assets_mid := 0]
## mydat[assets_3level == 2, assets_mid := 1]
## mydat[is.na(assets_3level), assets_mid := NA]
## mydat[, assets_high := 0]
## mydat[assets_3level == 3, assets_high := 1]
## mydat[is.na(assets_3level), assets_high := NA]
## mydat[, ppl_per_room := hh_members_1_TEXT / rooms_1_TEXT]
## mydat[rooms_1_TEXT == 0, ppl_per_room := NA]

## ## Religious variables edits
## mydat[, figrel_religious_d := as.integer(figrel_religious < 4)]
## mydat[, relind_private := relact_prayer]
## myvars <- c("relact_visit", "relact_services", "relact_donations", "relact_fasts")
## mydat[, relind_public := psych::alpha(mydat[, ..myvars])$scores]
## psych::alpha(mydat[, ..myvars])        #Alpha value
## myvars <- c("socties4_religion", "socties4_comunity")
## mydat[, relind_socties := psych::alpha(mydat[, ..myvars])$scores]
## psych::alpha(mydat[, ..myvars])        #Alpha value
## ## I just want to check the alpha with the two dichotomous variables in there
## myvars <- c("socties4_religion", "socties4_comunity", "figrel_religious_d",
##             "involve_local_4")
## psych::alpha(mydat[, ..myvars])
## ## More variables
## mydat[, perc_married := plyr::mapvalues(married, 1:3, c(1, 0, 0))]
## mydat[, relact_1 := relact_prayer]
## mydat[, relact_2 := rowMeans(.SD, na.rm = TRUE),
##       .SDcols = c("relact_visit", "relact_services", "relact_donations",
##                   "relact_fasts")]
## mydat[, poleff_local := rowMeans(.SD, na.rm = TRUE),
##       .SDcols = c("poleff_mcd", "poleff_pradhan", "pradhan_influence")]
## mydat[, figrel_any := as.integer(figrel_sum > 0)]
## mydat[, involve_local_any := as.integer(involve_local_sum > 0)]
## soctiesA <- c("socties1", "socties2")
## mydat[, socties1 := range01(socties1, 1, 3)]
## mydat[, socties2 := range01(socties2, 1, 5)]
## soctiesB <- c("socties3_childcare", "socties3_finances", "socties3_govdocs",
##               "socties3_disputes", "socties3_clean")
## soctiesC <- c("socties4_family", "socties4_extended")
## soctiesD <- c("socties4_neighbor", "socties4_distantrel", "socties4_caste",
##               "socties4_comunity", "socties4_village", "socties4_coworker",
##               "socties4_religion")
## mydat[, soctiesA := rowMeans(.SD, na.rm = TRUE), .SDcols = soctiesA]
## mydat[, soctiesB := rowMeans(.SD, na.rm = TRUE), .SDcols = soctiesB]
## mydat[, soctiesC := rowMeans(.SD, na.rm = TRUE), .SDcols = soctiesC]
## mydat[, soctiesD := rowMeans(.SD, na.rm = TRUE), .SDcols = soctiesD]
## mydat[, polact := range01(polact_2015_sum, 0, 6)]
## mydat[is.na(polact), polact := 0]
## myvars <- c("diff_housing", "diff_loan", "diff_sell", "savings")
## mydat[, savings := revcode(savings, 1, 3)]
## mydat[, savings := range01(savings, 1, 3)]
## mydat[, finhard := rowMeans(.SD, na.rm = TRUE), .SDcols = myvars]
## assets02 <- c("assets_lpgsmall", "assets_lpglarge", "assets_bike",
##               "assets_fan")
## assets03 <- c("assets_cooler", "assets_2wheel", "assets_tv", "assets_fridge")
## assets04 <- c("assets_comp", "assets_4wheel", "assets_ac")
## ind02 <- rowSums(mydat[, ..assets02], na.rm = TRUE) > 0
## ind03 <- rowSums(mydat[, ..assets03], na.rm = TRUE) > 0
## ind04 <- rowSums(mydat[, ..assets04], na.rm = TRUE) > 0
## mydat[, assets_groups := NA_integer_]
## mydat[ind02 & !ind03 & !ind04, assets_groups := 1]
## mydat[ind03 & !ind04, assets_groups := 2]
## mydat[ind04, assets_groups := 3]
## mydat[, own := (res_type - 2) * -1]

## ## Get rid of people for whom the location is a relative's house
## mydat <- mydat[res_type != 3 | is.na(res_type)]
## nrow(mydat) #5,812, so two removed

## ## This is the code to extract the geocodes from the comments and generate some
## ## variables indicating how good a geocoordinate is
## myvars <- c("ResponseId", "StartDate", "enumerator", "DeviceIdentifier",
##             "settlement", "LocationLatitude", "LocationLongitude", "notes")
## geodat <- mydat[, ..myvars]
## geodat[, ind := 1:.N]
## setnames(geodat, c("LocationLatitude", "LocationLongitude"), c("qlat", "qlong"))
## mycoords <- str_extract(geodat$notes, "[0-9]{1,2}\\.[0-9]{1,} *, *[0-9]{1,2}\\.[0-9]{1,}")
## mycoords <- str_replace_all(mycoords, " ", "")
## mycoords <- str_split(mycoords, ",")
## geodat[, plat := as.numeric(sapply(mycoords, "[", 1))]
## geodat[, plong := as.numeric(sapply(mycoords, "[", 2))]
## geodat[, lat := plat]
## geodat[, long := plong]
## geodat[, geotype := "p"]
## geodat[is.na(lat), geotype := "q"]
## geodat[is.na(lat), lat := qlat]
## geodat[is.na(long), long := qlong]
## geodat[, qrep := paste0(qlat, ",", qlong)]
## qcoords <- table(geodat$qrep)
## geodat[, qrep := as.integer(plyr::mapvalues(qrep, names(qcoords), qcoords))]
## geodat[, prep := paste0(plat, ",", plong)]
## pcoords <- table(geodat$prep)
## pcoords[names(pcoords) == "NA,NA"] <- NA
## geodat[, prep := as.integer(plyr::mapvalues(prep, names(pcoords), pcoords))]
## geodat[, qout := as.integer(qlat < 28.6 | qlat > 28.73 | qlong < 77.12 | qlong > 77.35)]
## geodat[, pout := as.integer(plat < 28.6 | plat > 28.73 | plong < 77.12 | plong > 77.35)]
## geodat[, altered := 0]
## setcolorder(geodat, c("ind", "ResponseId", "StartDate", "enumerator",
##                       "DeviceIdentifier", "settlement", "qlat", "qlong",
##                       "qrep", "qout", "plat", "plong", "prep", "pout",
##                       "lat", "long", "geotype", "altered", "notes"))
## setorder(geodat, ind)
## ## write.csv(geodat, file = "geocoordinates.csv", row.names = FALSE)

## ## This is the code used to initially parse out the geocoordinates, although
## ## there was a lot of work done manually after the fact
## ## mycoords <- str_extract(mydat$notes, "[0-9]{1,2}\\.[0-9]{1,} *, *[0-9]{1,2}\\.[0-9]{1,}")
## ## mycoords <- str_replace_all(mycoords, " ", "")
## ## mycoords <- str_split(mycoords, ",")
## ## mydat[, pinlat := sapply(mycoords, "[", 1)]
## ## mydat[, pinlong := sapply(mycoords, "[", 2)]
## ## mydat[, lat := pinlat]
## ## mydat[, long := pinlong]
## ## write.csv(mydat[, c("ResponseId", "StartDate", "enumerator", "DeviceIdentifier",
## ##                     "settlement", "LocationLatitude", "LocationLongitude",
## ##                     "notes", "pinlat", "pinlong", "lat", "long")],
## ##           file = "geocoordinates.csv",
## ##           row.names = FALSE)
## ## myenums <- sort(unique(mydat$enumerator))
## ## write.csv(myenums, file = "enumerators.csv", row.names = FALSE)

## ## Read in the manually edited geocoordinates
## mydcoord <- as.data.table(read_excel(file.path(my_dir, "geocoordinates.xlsx"), na = "NA"))
## mydat <- merge(mydat, mydcoord[, c("ResponseId", "lat", "long", "geonotes")])

## ## Get the radii
## mygeo <- copy(mydat)
## mygeo <- mygeo[!(is.na(lat) | is.na(long))]
## coordinates(mygeo) <- ~long+lat
## proj4string(mygeo) <- CRS("+init=epsg:4326")
## mydist <- distm(coordinates(mygeo))                  #geodesic distance on WGS84 ellipsoid
## my50 <- mydist <= 50
## my100 <- mydist <= 100
## my150 <- mydist <= 150
## mygeo$percmus50m <- NA
## for (i in 1:nrow(mygeo)) {
##     mygeo$percmus50m[i] <- mean(mygeo$musVhindu[my50[, i]], na.rm = TRUE)
## }
## mygeo$percmus50m[apply(my50, 2, sum) < 10] <- NA
## mygeo$percmus100m <- NA
## for (i in 1:nrow(mygeo)) {
##     mygeo$percmus100m[i] <- mean(mygeo$musVhindu[my100[, i]], na.rm = TRUE)
## }
## mygeo$percmus100m[apply(my100, 2, sum) < 10] <- NA
## mygeo$percmus150m <- NA
## for (i in 1:nrow(mygeo)) {
##     mygeo$percmus150m[i] <- mean(mygeo$musVhindu[my150[, i]], na.rm = TRUE)
## }
## mygeo$percmus150m[apply(my150, 2, sum) < 10] <- NA
## mydiv <- function(pmus) {
##     phind <- 1 - pmus
##     ind <- 1 - ((0.5 - pmus) / 0.5)^2 * pmus - ((0.5 - phind) / 0.5)^2 * phind
##     return(ind)
## }
## myfrac <- function(pmus) {
##     phind <- 1 - pmus
##     ind <- 1 - (pmus^2 + phind^2)
##     return(ind)
## }
## mygeo$div50m <- mydiv(mygeo$percmus50m)
## mygeo$div100m <- mydiv(mygeo$percmus100m)
## mygeo$div150m <- mydiv(mygeo$percmus150m)
## ## Do nearest 50 as well
## mygeo$percmus50k <- NA
## for (i in 1:nrow(mygeo)) {
##     mygeo$percmus50k[i] <- mean(mygeo$musVhindu[order(mydist[, i])[1:51]],
##                                 na.rm = TRUE)
## }
## mygeo$div50k <- mydiv(mygeo$percmus50k)

## ## Merge into the main data
## myvars <- c("ResponseId", "percmus50m", "percmus100m", "percmus150m", "div50m",
##             "div100m", "div150m", "percmus50k", "div50k")
## mymerge <- as.data.frame(mygeo[, myvars])
## mymerge$long <- NULL
## mymerge$lat <- NULL
## mydat <- merge(mydat, mymerge, by = "ResponseId", all = TRUE)

## ## Caste data
## caste_dat <- read.csv(file.path(my_dir, "caste_lowerupper.csv"))
## mydat <- merge(mydat, caste_dat, by = "caste_id_1_TEXT", all.x = TRUE)
## mylevels <- c("Lower Caste", "Upper Caste")
## mydat[, caste_levels2 := as.numeric(plyr::mapvalues(caste_levels2, mylevels, c(0, 1)))]

## ## Enumerator data
## enum_dat <- as.data.table(read_excel(file.path(my_dir, "survey_enumerators.xls")))
## enum_dat[, Enumerator := gsub(" ", " ", Enumerator)] #xls has "non-breaking spaces"
## enum_dat[Religion == "NA", Religion := NA]
## enum_dat[EID == "NA", EID := NA]
## mydat[, enum_rel := enumerator]
## mydat[, enum_rel := plyr::mapvalues(enum_rel, enum_dat$Enumerator,
##                                     enum_dat$Religion)]
## mydat[, enum_id := enumerator]
## mydat[, enum_id := plyr::mapvalues(enum_id, enum_dat$Enumerator,
##                                    enum_dat$EID)]
## mydat[, enum_id := as.numeric(enum_id)]
## mydat[, enum_id := plyr::mapvalues(enum_id, sort(unique(enum_id)),
##                                    1:length(sort(unique(enum_id))))]
## mydat[, enum_id := factor(enum_id)]
## mydat[, enum_id := relevel(enum_id, ref = "5")]

## mydat[, enum_rel_num := 0]
## mydat[enum_rel == "Hindu", enum_rel_num := 1]
## mydat[is.na(enum_rel), enum_rel_num := NA]

## ## Standardizing settlement names
## ## Two separate labels in survey for area C6, that is why it is duplicated
## settlements <- data.table(
##     site = c("B", rep("C", 7), "D", rep("E", 4), rep("D", 3), "A"),
##     basti = c("B1", "C1", "C3", "C6", "C6", "C5", "C2", "C4", "D1", "E3", "E4",
##               "E1", "E2", "D2", "D3", "D4", "A1"),
##     households = c(1260, 1550, 1080, 850, 850, 125, 130, 1065, 900, 230, 80,
##                    2400, 140, 400, 400, 300, 1400),
##     number = c(1:17)
## )
## setorder(settlements, basti)
## mydat[, settlement := plyr::mapvalues(settlement, settlements$number,
##                                       paste0(settlements$site, "-", settlements$basti))]
## mydat[, settsite := sapply(str_split(settlement, "-"), `[`, 1)]
## mydat[, settbasti := sapply(str_split(settlement, "-"), `[`, 2)]

## ## Looking just at the treatments of interest
## ## For the results in this paper, we will only use the data for the six
## ## treatments we're interested in, so I'm going to get those fixed now
## subdat <- mydat[treat %in% c(1, 3, 4, 6, 7, 9)]
## subdat[, treat := plyr::mapvalues(treat, 1:9, c(1, NA, 4, 2, NA, 5, 3, NA, 6))]
## subdat[, treat := factor(treat, 1:6)]
## ## 1 = Control
## ## 2 = Horizontal Accountability
## ## 3 = Vertical Accountability
## ## 4 = Black Sheep
## ## 5 = Horizontal Accountability & Black Sheep
## ## 6 = Vertical Accountability & Black Sheep

## ## Note that this differs from the numbers for mydat

## ## Create a treatment variable that treats 2-6 as a single treatment group
## subdat[, treatcomb := plyr::mapvalues(treat, 1:6, c(0, 1, 1, 1, 1, 1))]

## ## For now, exclude the one individual that doesn't have a settlement
## subdat <- subdat[!is.na(settlement)]
## nrow(subdat) #3843, from 3844, make note for table later

## ## Need an index of the outcomes
## out_vars <- c("out_benefit", "out_interest", "out_fee", "out_contract",
##               "out_influence")
## outcome_alphas <- psych::alpha(subdat[, ..out_vars])
## subdat[, out_ind := outcome_alphas$scores]
## outcome_alphas <- psych::alpha(mydat[, ..out_vars])
## mydat[, out_ind := outcome_alphas$scores]

## ## I need inverse probability weights - for this set of treatments, this
## ## should be 1/(5/6) for those in the treatment group and 1/(1/6) for those in
## ## the control group
## subdat[, ipw := NA_real_]
## subdat[treatcomb == 0, ipw := 1 / (1/6)]
## subdat[treatcomb == 1, ipw := 1 / (5/6)]

## ## Save out a de-identified version of the data with just the data we need
## ## First with subdat, the subset of respondents we do most of our analysis with
## to_keep <- c("ResponseId", "date", "duration", "treat", "settlement",
##              "settsite", "settbasti", "female", "age_1_TEXT", "res_yrs_1_TEXT",
##              "res_citizen", "neighborhood", "relact_prayer", "relact_visit",
##              "relact_services", "relact_donations", "relact_fasts", "poleff_pm",
##              "poleff_cm", "poleff_mcd", "poleff_parties", "poleff_police",
##              "poleff_pradhan", "socties4_comunity", "socties4_religion",
##              "adm_service", "drain_prob", "manchk_1", "manchk_2", "out_benefit",
##              "out_interest", "out_fee", "out_contract", "out_influence", "educ",
##              "employed", "diff_loan", "diff_sell", "savings", "rooms_1_TEXT",
##              "pradhan_influence_orig", "own", "moving", "rel_id_muslim",
##              "musVhindu", "poleff_ave", "assets_3level", "assets_mid",
##              "assets_high", "ppl_per_room", "figrel_religious_d",
##              "relind_private", "relind_public", "relind_socties",
##              "perc_married", "relact_1", "relact_2", "poleff_local",
##              "figrel_any", "involve_local_any", "soctiesA", "soctiesB",
##              "soctiesC", "soctiesD", "polact", "finhard", "div50m", "div100m",
##              "div150m", "caste_levels3_rel", "caste_levels2", "enum_rel",
##              "enum_id", "enum_rel_num", "treatcomb", "ipw", "out_ind")
## subdat <- subdat[, ..to_keep]
## setcolorder(subdat, to_keep)
## ## Now with mydat, the dataset that contains all treatment groups
## to_keep <- to_keep[to_keep %in% names(mydat)]
## to_keep <- c(to_keep, grep("timer{0,1}_.*PageSubmit", names(mydat), value = TRUE))
## mydat <- mydat[, ..to_keep]
## setcolorder(mydat, to_keep)
## save(subdat, mydat, file = file.path(my_dir, "main_dat.RData"))

################################################################################
#### Read in data after cleaning
################################################################################

load(file.path(my_dir, "main_dat.RData"))

################################################################################
#### Saved values
################################################################################

## Save some lists of variables that are used for some of the analyses
out_vars <- c("out_benefit", "out_interest", "out_fee", "out_contract",
              "out_influence")
out_labs <- c("Benefit", "Interest", "Fee", "Contract", "Influence")
cont_vars <- c("female", "perc_married", "age_1_TEXT", "res_yrs_1_TEXT", "own",
            "rooms_1_TEXT", "employed", "finhard", "assets_3level", "educ",
            "relact_1", "relact_2", "musVhindu", "poleff_local", "figrel_any",
            "involve_local_any", "soctiesA", "soctiesB", "soctiesC",
            "soctiesD", "polact", "adm_service", "drain_prob", "caste_levels2")
cont_vars_woMus <- cont_vars[!(cont_vars == "musVhindu")]
cont_labs <- c("Gender (1 = Female, 0 = Male)", "Married (1 = Married, 0 = Other)",
               "Age", "Years Residing in Settlement",
               "Ownership (1 = Own Residence, 0 = Other)",
               "Number of Rooms in Residence", "Employment Status (1 = Employed)",
               "Financial Hardship Index (1 = More Hardship)",
               "Assets Index (1-3, 3 = Most Assets)", "Education Level (1-10 Scale)",
               "Practice of Prayer (1-5 Scale)",
               "Religious Behavior Index (1-5 Scale)",
               "Religious ID (1 = Muslim, 0 = Hindu)",
               "Local Political Efficacy Index (1-4 Scale)",
               "Relation to Local Figures (1 = Any)",
               "Involvement in Local Organizations (1 = Any)",
               "General Social Ties (0-1 Scale)", "Helpfulness Index (1-4 Scale)",
               "Forgo Wages Index, Family (1-3 Scale)",
               "Forgo Wages Index, Others (1-3 Scale)",
               "% Political Activities Engaged In", "Quality of Drainage (1-5)",
               "Drainage Problem Requiring Help", "Caste Level (0 = Lower, 1 = Upper)")
cont_labs_woMus <- cont_labs[!(cont_labs == "Religious ID (1 = Muslim, 0 = Hindu)")]
treats <- c("Horizontal\nAccountability", "Vertical\nAccountability",
            "Black\nSheep", "Blk. Shp. +\nHor. Acct.",
            "Blk. Shp. +\nVert. Acct.")

################################################################################
#### Paper Analysis
################################################################################

## Table 1: Treatment Group Sizes
treatlabs_lines <- c("Horizontal\nAccountability", "Vertical\nAccountability",
                     "Black\nSheep", "Blk. Shp. +\nHor. Acct.",
                     "Blk. Shp. +\nVert. Acct.")
treatlabs <- c("Control", "Horizontal Accountability",
               "Vertical Accountability", "Black Sheep",
               "Horizontal Accountability + Black Sheep",
               "Vertical Accountability + Black Sheep")
names(treatlabs) <- 1:6
mytab_out <- subdat[, list(N = .N), by = treat]
setnames(mytab_out, "treat", "Treatment")
setorder(mytab_out, Treatment)
mytab_out <- mytab_out %>% mutate(Treatment = case_when(
                                      Treatment == 1 ~ "Control",
                                      Treatment == 2 ~ "Horizontal Accountability",
                                      Treatment == 3 ~ "Vertical Accountability",
                                      Treatment == 4 ~ "Black Sheep",
                                      Treatment == 5 ~ "Horizontal Accountability + Black Sheep",
                                      Treatment == 6 ~ "Vertical Accountability + Black Sheep"
                                  ))
tab1_out <- stargazer(mytab_out, type = "latex", summary = FALSE, digits = 0, rownames = FALSE,
                      label = "tab:treatment-sizes",
                      title = "Treatment Group Sizes")
tab1_out <- str_replace_all(tab1_out, " cc", " lc")
writeLines(tab1_out, file.path(tabs_dir, "treat_sizes.tex"))

## Table 2: Demographics
cont2_vars <- c(
    "female", "perc_married", "age_1_TEXT", "own", "ppl_per_room", "educ",
    "employed", "assets_mid", "assets_high", "finhard",
    "relind_private", "relind_public", "relind_socties",
    "soctiesA", "soctiesB", "soctiesC", "soctiesD",
    "poleff_ave", "poleff_local", "figrel_any", "polact", "involve_local_any",
    "caste_levels2",
    "div100m",
    "res_yrs_1_TEXT", "res_citizen", "adm_service", "drain_prob", "enum_rel_num")
cont2_labs <- c(
    "Gender (1 = Female, 0 = Male)",
    "Married (1 = Married, 0 = Other)",
    "Age",
    "Home Ownership (1 = Own Residence, 0 = Other)",
    "People Per Room in Residence",
    "Education Level (1-10 Scale)",
    "Employment Status (1 = Employed)",
    "% Mid-Tier Assets (Bike, Cooler, Fridge)",
    "% High-Tier Assets (Vehicle, Computer, AC)",
    "Financial Hardship Index (1 = More Hardship)",
    "Private Religious Practice (1 = Never, 5 = Daily)",
    "Public Religious Practice (4 Items, 1--5 Scale)",
    "Religious Ties Index (2 Items, 1--3 Scale)",
    "General Social Ties (0-1 Scale)",
    "Helpfulness Index (1-4 Scale)",
    "Forgo Wages Index, Family (1-3 Scale)",
    "Forgo Wages Index, Others (1-3 Scale)",
    "General Political Trust Index (6 Items, 1--4 Scale)",
    "Local Political Trust Index (3 Items, 1-4 Scale)",
    "Network in State Institutions (7 Items, 1 = Any)",
    "Political Participation, 2015 (6 Items)",
    "Involvement in Local Organizations (1 = Any)",
    "Caste Level (0 = Lower, 1 = Upper)",
    "Hindu-Muslim Diversity within 100 Meters (0--1 Index)",
    "Years Residing in Settlement",
    "Consider Self Citizen of Delhi (1 = Yes)",
    "Quality of Drainage (1-5)",
    "Drainage Problem Requiring Help (1 = Yes)",
    "% Hindu Enumerator")
demvars <- data.table(
    "Variable" = cont2_labs,
    "Overall" = colMeans(subdat[, ..cont2_vars], na.rm = TRUE),
    "Hindus" = colMeans(subdat[musVhindu == 0, ..cont2_vars], na.rm = TRUE),
    "Muslims" = colMeans(subdat[musVhindu == 1, ..cont2_vars], na.rm = TRUE)
)
tab2_out <- stargazer(demvars, type = "latex", summary = FALSE, digits = 2, rownames = FALSE,
                      label = "tab:demvars",
                      title = "Sample Demographic Characteristics, Overall and by Religious Identification")
tab2_out <- str_replace_all(tab2_out, "cccc", "lccc")
tab2_out <- c(tab2_out[1:11],
              "\\hspace{3mm}Demographics &  &  &  \\\\",
              tab2_out[12:21],
              "\\\\ \\hspace{3mm}Religiosity/Social Ties &  &  &  \\\\",
              tab2_out[22:28],
              "\\\\ \\hspace{3mm}Political Factors &  &  &  \\\\",
              tab2_out[29:33],
              "\\\\ \\hspace{3mm}Caste &  &  &  \\\\",
              tab2_out[34],
              "\\\\ \\hspace{3mm}Diversity &  &  &  \\\\",
              tab2_out[35],
              "\\\\ \\hspace{3mm}Neighborhood and Enumerator &  &  &  \\\\",
              tab2_out[36:43])
writeLines(tab2_out, file.path(tabs_dir, "demvars_musVhindu_paper.tex"))

## Table 3: Main analysis by religion
mod_int <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw, data = subdat)
stargazer(mod_int, type = "latex", digits = 2,
          order = paste0("^", c("Constant", "treatcomb1", "musVhindu", "treatcomb1:musVhindu"),
                         "$"),
          covariate.labels = c("Constant", "Treatments (Combined)", "Muslim",
                               "Treatments (Combined) x Muslim"),
          dep.var.caption = "Dependent Variable: Index of Favorability",
          dep.var.labels.include = FALSE,
          omit.table.layout = "n",
          title = "Regression of Favorability toward Drainage Program, by Religion",
          label = "tab:outs-all",
          out = file.path(tabs_dir, "mod_int.tex"))

## Table 4: Regression without interaction and with diversity
mod_null <- lm(out_ind ~ treatcomb, weights = ipw, data = subdat)
mod_div <- lm(out_ind ~ treatcomb * div100m, weights = ipw, data = subdat)
stargazer(mod_null, mod_div, type = "latex", digits = 2,
          order = paste0("^", c("Constant", "treatcomb1", "div100m", "treatcomb1:div100m"),
                         "$"),
          covariate.labels = c("Constant", "Treatments (Combined)", "Diversity",
                               "Treatments (Combined) x Diversity"),
          dep.var.caption = "Dependent Variable: Index of Favorability",
          dep.var.labels.include = FALSE,
          omit.table.layout = "n",
          title = "Regression of Favorability toward Drainage Program, by Diversity",
          label = "tab:outs-null",
          out = file.path(tabs_dir, "mod_null.tex"))

## Footnote on multiple testing
my_pvals <- c(summary(mod_null)$coefficients[2, 4],
              summary(mod_div)$coefficients[c(2, 4), 4],
              summary(mod_int)$coefficients[c(2, 4), 4])
my_sigs_nocorr <- my_pvals < 0.05
table(my_sigs_nocorr)
my_sigs_bh <- p.adjust(my_pvals, "BH") < 0.05
table(my_sigs_bh)
my_sigs_bonf <- p.adjust(my_pvals, "bonferroni") < 0.05
table(my_sigs_bonf)
p.adjust(my_pvals, "BH")
p.adjust(my_pvals, "bonferroni")

## Figure 3: Effects by religion in graph
mod_int <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw, data = subdat)
p_data <- data.frame(
    treatcomb = factor(c(0, 1, 0, 1)),
    musVhindu = c(0, 0, 1, 1)
)
p_data <- cbind(p_data,
                predict(mod_int, newdata = p_data, interval = "confidence"))
mod_int_dt <- as.data.table(p_data)
setnames(mod_int_dt, c("lwr", "upr"), c("lwr95", "upr95"))
mod_int_dt <- cbind(mod_int_dt,
                predict(mod_int, newdata = mod_int_dt[, 1:2],
                        interval = "confidence", level = 0.9)[, 2:3])
setnames(mod_int_dt, c("lwr", "upr"), c("lwr90", "upr90"))
mod_int_dt[, treatcomb := plyr::mapvalues(treatcomb, 0:1, c("Control", "Treatments (Combined)"))]
mod_int_dt[, musVhindu := plyr::mapvalues(musVhindu, 0:1, c("Hindu", "Muslim"))]
## for combining with other
mod_int_dt[, outcome := "Index"]
## Model with Interaction and with Outcomes Separated
mods_int <- lapply(paste0(out_vars, " ~ ", "treatcomb * musVhindu"),
                   function(myform, mydat, mywt) { lm(as.formula(myform), mydat, weights = mywt) },
                   mydat = subdat, mywt = subdat$ipw)
tmp_dat <- data.frame(
    treatcomb = factor(c(0, 1, 0, 1)),
    musVhindu = c(0, 0, 1, 1)
)
mods_int_dt <- lapply(mods_int, function(mymod, new_dat) {
    append_dat1 <- predict(mymod, newdata = new_dat, interval = "confidence")
    colnames(append_dat1) <- c("fit", "lwr95", "upr95")
    append_dat2 <- predict(mymod, newdata = new_dat, interval = "confidence",
                           level = 0.9)
    colnames(append_dat2) <- c("fit", "lwr90", "upr90")
    out_dat <- cbind(new_dat, append_dat1, append_dat2[, 2:3])
    out_dat$outcome <- names(mymod$model)[1]
    return(out_dat)
}, new_dat = tmp_dat)
mods_int_dt <- rbindlist(mods_int_dt)
mods_int_dt[, treatcomb := plyr::mapvalues(treatcomb, 0:1, c("Control", "Treatments (Combined)"))]
mods_int_dt[, musVhindu := plyr::mapvalues(musVhindu, 0:1, c("Hindu", "Muslim"))]
mods_int_dt[, outcome := factor(outcome, levels = out_vars)]
mods_int_dt[, outcome := plyr::mapvalues(outcome, out_vars, out_labs)]
## Have a combined figure for all of the outcomes, with the index at the end
mods_int_dt[, treatcomb := plyr::mapvalues(treatcomb, "Treatments (Combined)",
                                           "Treatments\n(Combined)")]
mods_int_dt[, outcome := plyr::mapvalues(outcome, out_labs,
                                         paste0(1:5, ". ", out_labs))]
mod_int_dt[, treatcomb := plyr::mapvalues(treatcomb, "Treatments (Combined)",
                                           "Treatments\n(Combined)")]
p01 <- ggplot(mods_int_dt, aes(x = treatcomb, y = fit, group = musVhindu)) +
    geom_linerange(aes(ymin = lwr90, ymax = upr90), size = 0.6,
                   position = position_dodge(width = 0.4)) +
    geom_linerange(aes(ymin = lwr95, ymax = upr95), size = 0.3,
                   position = position_dodge(width = 0.4)) +
    geom_point(size = 0.9, position = position_dodge(width = 0.4)) +
    geom_line(aes(linetype = musVhindu),
              position = position_dodge(width = 0.4)) +
    labs(x = "Treatment",
         y = "Means and 90/95% CIs") +
    facet_wrap(~ outcome, nrow = 1) +
    coord_cartesian(ylim = c(2.25, 3.25)) +
    theme(legend.position = "none",
          strip.text.x = element_text(size = 7),
          axis.text.x = element_text(angle = 45, hjust = 1))
p02 <- ggplot(mod_int_dt, aes(x = treatcomb, y = fit, group = musVhindu)) +
    geom_linerange(aes(ymin = lwr90, ymax = upr90), size = 0.6,
                   position = position_dodge(width = 0.4)) +
    geom_linerange(aes(ymin = lwr95, ymax = upr95), size = 0.3,
                   position = position_dodge(width = 0.4)) +
    geom_point(size = 0.9, position = position_dodge(width = 0.4)) +
    geom_line(aes(linetype = musVhindu),
              position = position_dodge(width = 0.4)) +
    labs(x = "Treatment",
         y = "Means and 90/95% CIs",
         linetype = "Participant\nReligious\nIdentity") +
    facet_wrap(~ outcome, nrow = 1) +
    coord_cartesian(ylim = c(2.25, 3.25)) +
    theme(axis.title.y = element_blank(), axis.title.x = element_blank(),
          strip.text.x = element_text(size = 7),
          axis.text.x = element_text(angle = 45, hjust = 1))
pFinal <- plot_grid(p01, p02, align = "h", axis = "tb", nrow = 1, ncol = 2,
                    rel_widths = c(2.13, 1))
ggsave(file.path(figs_dir, "mods_int_windex.png"), pFinal, height = 4,
       width = 6.25)

## Table 5: Models with controls
cont2_vars <- c(
    "female", "perc_married", "age_1_TEXT", "own", "ppl_per_room", "educ",
    "employed", "assets_mid", "assets_high", "finhard",
    "relind_private", "relind_public", "relind_socties",
    "soctiesA", "soctiesB", "soctiesC", "soctiesD",
    "poleff_ave", "poleff_local", "figrel_any", "polact", "involve_local_any",
    "caste_levels2",
    "div100m",
    "res_yrs_1_TEXT", "res_citizen", "adm_service", "drain_prob", "enum_rel_num")
cont2_labs <- c(
    "Gender (1 = Female, 0 = Male)",
    "Married (1 = Married, 0 = Other)",
    "Age",
    "Home Ownership (1 = Own Residence, 0 = Other)",
    "People Per Room in Residence",
    "Education Level (1-10 Scale)",
    "Employment Status (1 = Employed)",
    "Mid-Tier Assets (Bike, Cooler, Fridge)",
    "High-Tier Assets (Vehicle, Computer, AC)",
    "Financial Hardship Index (1 = More Hardship)",
    "Private Religious Practice (1 = Never, 5 = Daily)",
    "Public Religious Practice (4 Items, 1--5 Scale)",
    "Religious Ties Index (2 Items, 1--3 Scale)",
    "General Social Ties (0-1 Scale)",
    "Helpfulness Index (1-4 Scale)",
    "Forgo Wages Index, Family (1-3 Scale)",
    "Forgo Wages Index, Others (1-3 Scale)",
    "General Political Trust Index (6 Items, 1--4 Scale)",
    "Local Political Trust Index (3 Items, 1-4 Scale)",
    "Network in State Institutions (7 Items, 1 = Any)",
    "Political Participation, 2015 (6 Items)",
    "Involvement in Local Organizations (1 = Any)",
    "Caste Level (0 = Lower, 1 = Upper)",
    "Hindu-Muslim Diversity within 100 Meters (0--1 Index)",
    "Years Residing in Settlement",
    "Consider Self Citizen of Delhi (1 = Yes)",
    "Quality of Drainage (1-5)",
    "Drainage Problem Requiring Help (1 = Yes)",
    "Hindu Enumerator")
mod_intwcont_0 <- lm(
    as.formula(paste0("out_ind ~ treatcomb * musVhindu + ",
                      paste0(cont2_vars[1:10], collapse = " + "))),
    weights = ipw, data = subdat)
mod_intwcont_1 <- lm(
    as.formula(paste0("out_ind ~ treatcomb * musVhindu + ",
                      paste0(cont2_vars[1:17], collapse = " + "))),
    weights = ipw, data = subdat)
mod_intwcont_2 <- lm(
    as.formula(paste0("out_ind ~ treatcomb * musVhindu + ",
                      paste0(cont2_vars[1:22], collapse = " + "))),
    weights = ipw, data = subdat)
mod_intwcont_3 <- lm(
    as.formula(paste0("out_ind ~ treatcomb * musVhindu + ",
                      paste0(cont2_vars[1:23], collapse = " + "))),
    weights = ipw, data = subdat)
mod_intwcont_4 <- lm(
    as.formula(paste0("out_ind ~ treatcomb * musVhindu + ",
                      paste0(cont2_vars[1:24], collapse = " + "))),
    weights = ipw, data = subdat)
mod_intwcont_5 <- lm(
    as.formula(paste0("out_ind ~ treatcomb * musVhindu + ",
                      paste0(cont2_vars, collapse = " + "))),
    weights = ipw, data = subdat)
stargazer(mod_int, mod_intwcont_0, mod_intwcont_1, mod_intwcont_2,
          mod_intwcont_3, mod_intwcont_4, mod_intwcont_5,
          type = "latex", digits = 2,
          dep.var.caption = "Dependent Variable: Index of Favorability",
          dep.var.labels.include = FALSE,
          order = paste0("^", c("Constant", "treatcomb1", "musVhindu", "treatcomb1:musVhindu"), "$"),
          omit = paste0("^", cont2_vars, "$"),
          covariate.labels = c("Constant", "Treatments", "Muslim",
                               "Treatments x Muslim"),
          add.lines = list(c("Demographic Controls", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Religiosity/Social Ties", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Political Factors", "No", "No", "No", "Yes", "Yes", "Yes", "Yes"),
                           c("Caste", "No", "No", "No", "No", "Yes", "Yes", "Yes"),
                           c("Diversity", "No", "No", "No", "No", "No", "Yes", "Yes"),
                           c("Neighborhood, Enum.", "No", "No", "No", "No", "No", "No", "Yes")),
          omit.table.layout = "n",
          title = "Models with Controls",
          label = "tab:mods-wcontrols",
          omit.stat = c("ser", "f"),
          out = file.path(tabs_dir, "mods_intwcont_compact.tex"))

## Figure 4: Interaction coefficient by caste and religious ties
## Determine median values and check to make sure they divide sample in a
## useful manner
myvars <- c("relind_private", "relind_public", "relind_socties")
mymedians <- apply(subdat[musVhindu == 1, ..myvars], 2, median, na.rm = TRUE)
## The median for prayer provides no variation, use the value just below the
## median
mymedians[1] <- 4
with(subdat, table(musVhindu, relind_private <= mymedians[1], useNA = "ifany"))
with(subdat, table(musVhindu, relind_public <= mymedians[2], useNA = "ifany"))
with(subdat, table(musVhindu, relind_socties <= mymedians[3], useNA = "ifany"))
## Now estimate models using these as the dividing factor
mod_int_relpriv_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                          data = subdat, subset = relind_private <= mymedians[1])
mod_int_relpriv_hi <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                         data = subdat, subset = relind_private > mymedians[1])
mod_int_relpub_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                         data = subdat, subset = relind_public <= mymedians[2])
mod_int_relpub_hi <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                        data = subdat, subset = relind_public > mymedians[2])
mod_int_relsocties_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                             data = subdat, subset = relind_socties <= mymedians[3])
mod_int_relsocties_hi <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                            data = subdat, subset = relind_socties > mymedians[3])
mod_caste_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                    data = subdat, subset = caste_levels2 == 0)
mod_caste_high <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                     data = subdat, subset = caste_levels2 == 1)
relmods <- list(mod_int_relpriv_low, mod_int_relpriv_hi,
                mod_int_relpub_low, mod_int_relpub_hi,
                mod_int_relsocties_low, mod_int_relsocties_hi,
                mod_caste_low, mod_caste_high)
## Creating dataset for plot
mods_int_rel <- lapply(relmods, function(mymod) {
    myout <- rbind(
        c(coef(mymod)[2], confint(mymod)[2, ], confint(mymod, level = 0.9)[2, ]),
        c(coef(mymod)[4], confint(mymod)[4, ], confint(mymod, level = 0.9)[4, ])
    )
    return(as.data.frame(myout))
})
mods_int_rel <- rbindlist(mods_int_rel)
setnames(mods_int_rel, c("fit", "lwr95", "upr95", "lwr90", "upr90"))
mods_int_rel[, musVhindu := rep(c("Hindu", "Muslim"), 8)]
mods_int_rel[, rel_quant := rep(c("Low", "Low", "High", "High"), 4)]
mods_int_rel[, rel_quant := factor(rel_quant, levels = c("Low", "High"))]
rel_factors_labs <- c("Private Practice\n(Prayer)", "Public Practice",
                      "Religious Ties", "Caste")
mods_int_rel[, rel_factor := rep(rel_factors_labs, each = 4)]
mods_int_rel[, rel_factor := factor(rel_factor, levels = rev(rel_factors_labs))]
## Plot
## Adjusted the plot to only include the final two items
ggplot(mods_int_rel[musVhindu == "Muslim" &
                    rel_factor %in% c("Religious Ties", "Caste")],
       aes(x = rel_quant, y = fit, group = musVhindu)) +
    geom_linerange(aes(ymin = lwr90, ymax = upr90), size = 1.25,
                   position = position_dodge(width = 0.4)) +
    geom_linerange(aes(ymin = lwr95, ymax = upr95), size = 0.6,
                   position = position_dodge(width = 0.4)) +
    geom_point(size = 2, position = position_dodge(width = 0.4)) +
    geom_line(size = 0.6, position = position_dodge(width = 0.4)) +
    labs(x = "Level of Moderating Factor",
         y = "Interaction Estimate for Religion,\nCombined Model, 90/95% CIs") +
    facet_wrap(~ rel_factor, nrow = 1)
ggsave(file.path(figs_dir, "mods_int_byrelind3.png"), height = 3.25, width = 4)

################################################################################
#### Appendix
################################################################################

## Table 4: Response rates by Basti
## Make the below what this depends on and move the de-identification to the
## code above
settlements <- data.table(
    site = c("B", rep("C", 7), "D", rep("E", 4), rep("D", 3), "A"),
    basti = c("B1", "C1", "C3", "C6", "C6", "C5", "C2", "C4", "D1", "E3", "E4",
              "E1", "E2", "D2", "D3", "D4", "A1"),
    households = c(1260, 1550, 1080, 850, 850, 125, 130, 1065, 900, 230, 80,
                   2400, 140, 400, 400, 300, 1400),
    number = c(1:17)
)
setorder(settlements, basti)
out_sett1 <- setorder(settlements[!duplicated(basti)], basti)
out_sett2 <- setorder(mydat[!is.na(treat) & !is.na(settlement), .N, by = settbasti],
                      settbasti)
out_sett <- data.table(
    "Site" = out_sett1$site,
    "Basti" = out_sett2$settbasti,
    "Households" = label_comma(accuracy = 1)(out_sett1$households),
    "Surveyed" = label_comma(accuracy = 1)(out_sett2$N),
    "Closed House/Refused" = label_comma(accuracy = 1)(out_sett1$households - out_sett2$N),
    "Response Rate" = label_percent(accuracy = 1)(out_sett2$N / out_sett1$households)
)
stargazer(out_sett, summary = FALSE,
          title = "Number of Participants Surveyed and Response Rate by \\textit{Basti}",
          label = "tab:rates-by-basti",
          out = file.path(tabs_dir, "response_rate.tex"))

## Flowchart numbers
## Invited number from the recorded number in Table 4
## Removed households number from the recorded number in Table 4
## Never randomized measured in commented out code
## Treatment allocation recorded in commented out code
## 2 not respondent's home recorded in commented out code
## 1 no settlement recorded in subdat code above
with(subdat, table(treatcomb, is.na(caste_levels2))) #9, 34
with(subdat[!is.na(caste_levels2)], table(treatcomb, is.na(div100m))) #2, 20
with(subdat[!is.na(caste_levels2) & !is.na(div100m)],
     table(treatcomb, is.na(out_ind))) #2, 9
with(subdat[!is.na(caste_levels2) & !is.na(div100m) & !is.na(out_ind)],
     table(treatcomb)) #654, 3113
## Note that analysis numbers may vary in other locations

## Table 5: the treatment groups in the experiment as a whole
add_vars <- c("caste_levels3_rel", "div50m", "div150m", "manchk_1", "manchk_2",
              "enum_rel")
my_tlabs <- c("Control", "No Names: Control", "Black Sheep",
              "Horizontal Accountability",
              "No Names: Horizontal Accountability",
              "Horizontal Accountability + Black Sheep",
              "Vertical Accountability",
              "No Names: Vertical Accountability",
              "Vertical Accountability + Black Sheep")
mydat[, treatlabs := plyr::mapvalues(treat, 1:9, my_tlabs)]
mydat[, treatlabs := factor(treatlabs, my_tlabs[c(1, 4, 7, 3, 6, 9, 2, 5, 8)])]
my_vars <- c("treat", "treatlabs", "out_ind", "settlement", "div100m", "musVhindu",
             "caste_levels2")
all_vars <- unique(c(cont_vars, add_vars, out_vars, my_vars))
keep <- rowSums(is.na(mydat[, ..my_vars])) == 0
newfull <- mydat[keep, ..all_vars]
my_out <- newfull[, (outcome = mean(out_ind)), by = list(treatlabs, musVhindu)]
setorder(my_out, treatlabs, musVhindu)
## Now make a table showing the sizes of the treatment groups
ttab <- newfull[, .N, by = treatlabs]
setorder(ttab, treatlabs)
ttab[, "In Paper?" := c(rep("Yes", 6), rep("No", 3))]
setnames(ttab, "treatlabs", "Treatment")
setcolorder(ttab, c("In Paper?", "Treatment", "N"))
ttab_out <- stargazer(ttab, summary = FALSE, rownames = FALSE,
                      title = "Treatment Groups", label = "tab:treatment-sizes")
ttab_out <- str_replace_all(ttab_out, "ccc", "clc")
writeLines(ttab_out, file.path(tabs_dir, "treat_groups.tex"))

## Figure 2 on measures of religion and religiosity
rel_vars <- c("musVhindu", "rel_id_muslim", "relact_prayer",
              "relact_visit", "relact_services", "relact_donations",
              "relact_fasts", "socties4_religion", "socties4_comunity")
rel_labs <- c("Muslim", "Sunni/Shia", "Prayer", "Visit Houses\nof Worship",
              "Attend Services", "Donations", "Fasting",
              "Obligated to Help\nReligious Leader",
              "Obligated to Help\nCo-Religionist")
rel_vars_dt <- subdat[, c("ResponseId", rel_vars), with = FALSE]
rel_vars_dt[, (rel_vars) := lapply(.SD, factor), .SDcols = rel_vars]
rel_vars_dt[, musVhindu := plyr::mapvalues(musVhindu, 0:1, c("No", "Yes"))]
rel_vars_dt[, rel_id_muslim := plyr::mapvalues(rel_id_muslim, 1:2, c("Shia", "Sunni"))]
rel_vars_dt[, relact_prayer := plyr::mapvalues(relact_prayer, 1:5,
                                               c("Never", "During\nDistress",
                                                 "On Festivals",
                                                 "1-2 Times\nPer Week",
                                                 "Daily"))]
rel_vars_dt[, relact_visit := plyr::mapvalues(relact_visit, 1:5,
                                              c("Never", "During\nDistress",
                                                 "On Festivals",
                                                 "1-2 Times\nPer Week",
                                                 "Daily"))]
rel_vars_dt[, relact_services := plyr::mapvalues(relact_services, 1:5,
                                                 c("Never", "During\nDistress",
                                                   "Rarely", "Sometimes",
                                                   "Whenever\nPossible"))]
rel_vars_dt[, relact_donations := plyr::mapvalues(relact_donations, 1:5,
                                                  c("Never", "During\nDistress",
                                                   "Rarely", "Sometimes",
                                                   "Whenever\nPossible"))]
rel_vars_dt[, relact_fasts := plyr::mapvalues(relact_fasts, 1:5,
                                              c("Never", "During\nDistress",
                                                   "Rarely", "Sometimes",
                                                   "Whenever\nPossible"))]
rel_vars_dt[, socties4_religion := plyr::mapvalues(socties4_religion, 1:3,
                                                   c("Not\nObligated",
                                                     "Somewhat\nObligated",
                                                     "Very\nObligated"))]
rel_vars_dt[, socties4_comunity := plyr::mapvalues(socties4_comunity, 1:3,
                                                   c("Not\nObligated",
                                                     "Somewhat\nObligated",
                                                     "Very\nObligated"))]
rel_vars_dt <- melt(rel_vars_dt, id.vars = "ResponseId")
rel_vars_dt[, variable := plyr::mapvalues(variable, rel_vars, rel_labs)]
rel_vars_dt <- rel_vars_dt[!is.na(value)]
tmp_faclabs <- c("No", "Yes", "Sunni", "Shia", "Never", "During\nDistress",
                 "Rarely", "Sometimes", "Whenever\nPossible", "On Festivals",
                 "1-2 Times\nPer Week", "Daily", "Not\nObligated",
                 "Somewhat\nObligated", "Very\nObligated")
rel_vars_dt[, value := factor(value, tmp_faclabs)]
ggplot(rel_vars_dt, aes(x = value)) +
    geom_bar() +
    facet_wrap(~ variable, scales = "free_x", ncol = 3) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
    labs(x = "Responses", y = "Number of Respondents")
ggsave(file.path(figs_dir, "rel_vars.png"), width = 6.25, height = 6)

## Table 6 (entered manually, but this code replicates numbers)
subdat[, caste_alt := NA_character_]
subdat[grepl("Lower", caste_levels3_rel), caste_alt := "Lower Caste"]
subdat[grepl("OBC", caste_levels3_rel) & musVhindu == 1,
       caste_alt := "Lower Caste"]
subdat[grepl("OBC", caste_levels3_rel) & musVhindu == 0,
       caste_alt := "OBC"]
subdat[grepl("Upper", caste_levels3_rel), caste_alt := "Upper Caste"]
with(subdat, table(musVhindu, caste_alt, useNA = "ifany"))

## Table 8: Separate outcomes
## Don't save to tex file because requires manual editing to fit - just print
## out here and edit
mods_noint <- list(lm(out_benefit ~ treatcomb, weights = ipw, data = subdat),
                   lm(out_interest ~ treatcomb, weights = ipw, data = subdat),
                   lm(out_fee ~ treatcomb, weights = ipw, data = subdat),
                   lm(out_contract ~ treatcomb, weights = ipw, data = subdat),
                   lm(out_influence ~ treatcomb, weights = ipw, data = subdat),
                   lm(out_ind ~ treatcomb, weights = ipw, data = subdat))
stargazer(mods_noint, type = "latex", digits = 2,
          order = c("Constant", "treatcomb1"),
          covariate.labels = c("Constant", "Treatments (Combined)"),
          dep.var.caption = "Dependent Variable:",
          dep.var.labels.include = FALSE,
          column.labels = c(out_labs, "Index"),
          model.numbers = FALSE,
          omit.table.layout = "n")

## Table 9: Separate outcomes and treatments
## Don't save to tex file because requires manual editing to fit - just print
## out here and edit
mods_noint_septreat <- list(lm(out_benefit ~ treat, data = subdat),
                            lm(out_interest ~ treat, data = subdat),
                            lm(out_fee ~ treat, data = subdat),
                            lm(out_contract ~ treat, data = subdat),
                            lm(out_influence ~ treat, data = subdat),
                            lm(out_ind ~ treat, data = subdat))
stargazer(mods_noint_septreat, type = "latex", digits = 2,
          order = c("Constant", "treat2", "treat3", "treat4", "treat5", "treat6"),
          covariate.labels = c("Constant", "Horizontal Acct.",
                               "Vertical Acct.",
                               "Black Sheep",
                               "Blk. Shp. + Hor.",
                               "Blk. Shp. + Vert."),
          dep.var.caption = "Dependent Variable:",
          dep.var.labels.include = FALSE,
          column.labels = c(out_labs, "Index"),
          model.numbers = FALSE,
          omit.table.layout = "n")

## Table 10: interaction model with separate treatments
## Don't save to tex file because requires manual editing to fit - just print
## out here and edit
mods_int_septreat <- list(lm(out_benefit ~ treat * musVhindu, data = subdat),
                          lm(out_interest ~ treat * musVhindu, data = subdat),
                          lm(out_fee ~ treat * musVhindu, data = subdat),
                          lm(out_contract ~ treat * musVhindu, data = subdat),
                          lm(out_influence ~ treat * musVhindu, data = subdat),
                          lm(out_ind ~ treat * musVhindu, data = subdat))
stargazer(mods_int_septreat, type = "latex", digits = 2,
          order = paste0("^",
                         c("Constant", "musVhindu", "treat2", "treat2:musVhindu",
                           "treat3", "treat3:musVhindu", "treat4", "treat4:musVhindu",
                           "treat5", "treat5:musVhindu", "treat6", "treat6:musVhindu"),
                         "$"),
          covariate.labels = c("Constant", "Muslim",
                               "Hor. Acct.", "Hor. Acct. x Mus.",
                               "Vert. Acct.", "Vert. Acct. x Mus.",
                               "Black Sheep", "Blk. Shp. x Mus.",
                               "Blk. Shp. + H.A.",
                               "(B.S. + H.A.) x Mus.",
                               "Blk. Shp. + V.A.",
                               "(B.S. + V.A.) x Mus."),
          dep.var.caption = "Dependent Variable:",
          dep.var.labels.include = FALSE,
          column.labels = c(out_labs, "Index"),
          model.numbers = FALSE)

## Figure 3: coefficient plot for interactive model
inteff <- function(mymod) {
    b1s <- paste0("treat", 2:6)
    modcoefs <- coef(mymod)
    modvcov <- vcov(mymod)
    mycoefs <- modcoefs[b1s] + modcoefs[paste0(b1s, ":musVhindu")]
    mySEs <- sapply(b1s, function(beta, vcovmat) {
        sqrt(vcovmat[beta, beta] +
             vcovmat[paste0(beta, ":musVhindu"), paste0(beta, ":musVhindu")] +
             2 * vcovmat[beta, paste0(beta, ":musVhindu")])
    }, vcovmat = modvcov)
    myLow95 <- mycoefs - mySEs * qt(0.975, df.residual(mymod))
    myUpp95 <- mycoefs + mySEs * qt(0.975, df.residual(mymod))
    myLow90 <- mycoefs - mySEs * qt(0.95, df.residual(mymod))
    myUpp90 <- mycoefs + mySEs * qt(0.95, df.residual(mymod))
    return(data.frame(outcome = as.character(mymod$terms[[2]]), rel = "Muslim",
                      treat = 2:6, coef = mycoefs, SE = mySEs, low95 = myLow95,
                      upp95 = myUpp95, low90 = myLow90, upp90 = myUpp90))
}
b1eff <- function(mymod) {
    b1s <- paste0("treat", 2:6)
    mycoefs <- coef(summary(mymod))[b1s, 1]
    mySEs <- coef(summary(mymod))[b1s, 2]
    myLow95 <- mycoefs - mySEs * qt(0.975, df.residual(mymod))
    myUpp95 <- mycoefs + mySEs * qt(0.975, df.residual(mymod))
    myLow90 <- mycoefs - mySEs * qt(0.95, df.residual(mymod))
    myUpp90 <- mycoefs + mySEs * qt(0.95, df.residual(mymod))
    return(data.frame(outcome = as.character(mymod$terms[[2]]), rel = "Hindu",
                      treat = 2:6, coef = mycoefs, SE = mySEs, low95 = myLow95,
                      upp95 = myUpp95, low90 = myLow90, upp90 = myUpp90))
}
modanalysis <- function(outcome, RHS, mydat) {
    myform <- paste0(outcome, " ~ ", RHS)
    mymod <- lm(myform, mydat)
    myout <- rbind(b1eff(mymod), inteff(mymod))
    treats <- c("Horizontal\nAccountability", "Vertical\nAccountability",
                "Black Sheep", "Black Sheep +\nHor. Acct.",
                "Black Sheep +\nVert. Acct.")
    myout$treat <- plyr::mapvalues(myout$treat, 2:6, treats)
    myout$treat <- factor(myout$treat, treats)
    return(myout)
}
mods_mVh <- lapply(out_vars, modanalysis, RHS = "treat * musVhindu",
                     mydat = subdat)
mods_mVh <- rbindlist(mods_mVh)
mods_mVh[, outcome := plyr::mapvalues(outcome, out_vars, out_labs)]
mods_mVh_bytreat <- mods_mVh[, list(coef = mean(coef)),
                             by = list(rel, treat)]
mods_mVh_bytreat[, mean(coef), by = rel] #for presentation take the difference
mods_mVh[, outcome := factor(outcome, out_labs)]
ggplot(mods_mVh, aes(x = treat, y = coef, colour = outcome)) +
    geom_hline(yintercept = 0, linetype = "dashed") +
    geom_linerange(aes(ymin = low90, ymax = upp90),
                  position = position_dodge(width = 0.5), size = 0.9) +
    geom_linerange(aes(ymin = low95, ymax = upp95),
                  position = position_dodge(width = 0.5), size = 0.45) +
    geom_point(position = position_dodge(width = 0.5), size = 0.8) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    coord_cartesian(ylim = c(-0.25, 0.65)) +
    facet_wrap(~ rel) +
    labs(x = "Treatment", y = "Effect Estimate with 90% and 95% CIs",
         colour = "Outcomes\n(1-4 Scale)")
ggsave(file.path(figs_dir, "mods_int_septreat.png"), width = 6.5, height = 4)

## Table 11: full models with controls
control_out <- stargazer(mod_int, mod_intwcont_0, mod_intwcont_1, mod_intwcont_2,
                         mod_intwcont_3, mod_intwcont_4, mod_intwcont_5,
                         type = "latex", digits = 2,
                         dep.var.caption = "Dependent Variable: Index of Favorability",
                         dep.var.labels.include = FALSE,
                         order = paste0("^", c("Constant", "treatcomb1", "musVhindu", "treatcomb1:musVhindu",
                                               cont2_vars), "$"),
                         covariate.labels = c("Constant", "Treatments", "Muslim",
                                              "Treatments x Muslim", cont2_labs),
                         omit.table.layout = "n",
                         title = "Main Model with Controls",
                         label = "tab:int-fullcont",
                         omit.stat = c("ser", "f"))
control_out[str_detect(control_out, "begin\\{table")] <- "\\begin{longtable}{lccccccc}"
control_out[str_detect(control_out, "end\\{table")] <- "\\end{longtable}"
control_out <- control_out[-which(str_detect(control_out, "tabular"))]
writeLines(control_out, file.path(tabs_dir, "mods_intwcont_full.tex"))

## Table 12: balance test
## Testing balance with the combined treatments
baltest_comb <- xBalance(as.formula(paste("as.numeric(treatcomb) ~ ", paste0(cont_vars, collapse = " + "))),
                         data = subdat,
                         report = c("chisquare.test", "std.diffs"))
baldat_comb <- data.table(
    var = factor(cont_labs, rev(cont_labs)),
    treat = rep("Treatments (Combined)", length(cont_labs)),
    balstat = baltest_comb$results[1:24, 1, ]
)
baltest_t2 <- xBalance(as.formula(paste("as.logical(treat == 2) ~ ", paste0(cont_vars, collapse = " + "))),
                         data = subdat[treat %in% c(1, 2)],
                         report = c("chisquare.test", "std.diffs"))
baltest_t3 <- xBalance(as.formula(paste("as.logical(treat == 3) ~ ", paste0(cont_vars, collapse = " + "))),
                         data = subdat[treat %in% c(1, 3)],
                         report = c("chisquare.test", "std.diffs"))
baltest_t4 <- xBalance(as.formula(paste("as.logical(treat == 4) ~ ", paste0(cont_vars, collapse = " + "))),
                         data = subdat[treat %in% c(1, 4)],
                         report = c("chisquare.test", "std.diffs"))
baltest_t5 <- xBalance(as.formula(paste("as.logical(treat == 5) ~ ", paste0(cont_vars, collapse = " + "))),
                         data = subdat[treat %in% c(1, 5)],
                         report = c("chisquare.test", "std.diffs"))
baltest_t6 <- xBalance(as.formula(paste("as.logical(treat == 6) ~ ", paste0(cont_vars, collapse = " + "))),
                         data = subdat[treat %in% c(1, 6)],
                         report = c("chisquare.test", "std.diffs"))
baldat_sep <- data.table(
    var = factor(rep(cont_labs, 5), rev(cont_labs)),
    treat = rep(c("Horizontal Accountability", "Vertical Accountability",
                  "Black Sheep",
                  "Horizontal Accountability + Black Sheep",
                  "Vertical Accountability + Black Sheep"),
                each = length(cont_labs)),
    balstat = c(baltest_t2$results[1:24, 1, ],
                baltest_t3$results[1:24, 1, ],
                baltest_t4$results[1:24, 1, ],
                baltest_t5$results[1:24, 1, ],
                baltest_t6$results[1:24, 1, ])
)
tmp_dat <- rbindlist(list(baltest_t2$overall, baltest_t3$overall,
                          baltest_t4$overall, baltest_t5$overall,
                          baltest_t6$overall, baltest_comb$overall))
baltab_overall <- data.table(
    "Treatment" = c("Horizontal Accountability", "Vertical Accountability",
                    "Black Sheep",
                    "Horizontal Accountability + Black Sheep",
                    "Vertical Accountability + Black Sheep",
                    "Treatments Combined"),
    tmp_dat[, 1],
    tmp_dat[, 2],
    tmp_dat[, 3]
)
setnames(baltab_overall, c("Treatment", "Chi-Square", "Degrees of Freedom",
                           "P-value"))
baldat_out <- stargazer(baltab_overall, summary = FALSE, rownames = FALSE,
                        digits = 2,
                        title = "Omnibus Balance Tests", label = "tab:bal-test")
baldat_out <- str_replace_all(baldat_out, "cccc", "lccc")
baldat_out <- str_replace_all(baldat_out, "CHI", "$ \\\\chi^2 $")
writeLines(baldat_out, file.path(tabs_dir, "baldat.tex"))

## Figure 4: balance in figure form
balplot_1 <- ggplot(baldat_sep, aes(x = balstat, y = var, color = treat)) +
    geom_vline(xintercept = 0, linetype = 2, size = 0.5) +
    geom_point() +
    coord_cartesian(xlim = c(-0.15, 0.15)) +
    labs(y = "Covariate", x = "Standardized Difference", colour = "Treatment",
         title = "Treatments Separate") +
    theme(legend.position = "bottom",
          text = element_text(size = 7)) +
    guides(color = guide_legend(nrow = 3, byrow = TRUE))
balplot_2 <- ggplot(baldat_comb, aes(x = balstat, y = var)) +
    geom_vline(xintercept = 0, linetype = 2, size = 0.5) +
    geom_point() +
    coord_cartesian(xlim = c(-0.15, 0.15)) +
    labs(y = "Covariate", x = "Standardized Difference",
         title = "Treatments Combined") +
    theme(text = element_text(size = 7))
balplot_final <- plot_grid(balplot_1, balplot_2, ncol = 1,
                           rel_heights = c(1.2, 1), align = "v", axis = "lr")
ggsave(file.path(figs_dir, "balance.png"), balplot_final, width = 6, height = 8)

## Table 13: manipulation check
mod_chk <- lm(out_ind ~ treatcomb * musVhindu, data = subdat,
              subset = manchk_1 == 3 & manchk_2 == 2, weights = ipw)
stargazer(mod_chk, type = "latex", digits = 2,
          order = paste0("^", c("Constant", "treatcomb1", "musVhindu", "treatcomb1:musVhindu"),
                         "$"),
          covariate.labels = c("Constant", "Treatments (Combined)", "Muslim",
                               "Treatments (Combined) x Muslim"),
          dep.var.caption = "Dependent Variable: Index of Favorability toward Drainage Program",
          dep.var.labels.include = FALSE,
          out = file.path(tabs_dir, "mod_chk.tex"))

## Table 14: length of treatment assignment
timer_vars <- grep("timer{0,1}_.*PageSubmit", names(mydat), value = TRUE)
timer_date_vars <- c("date", timer_vars)
timers <- mydat[, ..timer_date_vars]
setnames(timers, gsub("timer{0,1}_([^_]+)_PageSubmit", "\\1", names(timers)))
timers[!is.na(cond1b), cond1a := cond1b]
timers[!is.na(cond2b), cond2a := cond2b]
timers[!is.na(cond3b), cond3a := cond3b]
timers[!is.na(cond4b), cond4a := cond4b]
timers[!is.na(cond5b), cond5a := cond5b]
timers[!is.na(cond6b), cond6a := cond6b]
timers[!is.na(cond7b), cond7a := cond7b]
timers[!is.na(cond8b), cond8a := cond8b]
timers[!is.na(cond9b), cond9a := cond9b]
setnames(timers, paste0("cond", 1:9, "a"), paste0("cond", 1:9))
timers[, paste0("cond", 1:9, "b") := NULL]
timer_vars <- names(timers)[-1]
timers[, overall := rowSums(.SD, na.rm = TRUE), .SDcols = timer_vars]
timers[overall == 0, overall := NA]
## Create timers dataset for our subset of individuals
timers[, id := mydat$ResponseId]
timers[, treat := mydat$treat]
timers[, enum_id := mydat$enum_id]
timers[, treat := plyr::mapvalues(treat, 1:9, c(1, NA, 4, 2, NA, 5, 3, NA, 6))]
timers[, treat := factor(treat, 1:6)]
timers_sub <- timers[!is.na(treat) & !is.na(mydat$settlement), ]
## Get the treatment variables better formatted
timers_sub[, treat_time := rowSums(.SD, na.rm = TRUE),
       .SDcols = paste0("cond", 1:9)]
timers_sub[, paste0("cond", 1:9) := NULL]
## Rename the enumerator variable
setnames(timers_sub, "enum", "enum_items")
## Order dataset
setcolorder(timers_sub,
            c("id", "date", "treat", "enum_id", "consent", "demographics",
              "residence", "religion", "localfigures", "ties", "drainage",
              "expintro", "treat_time", "manchk", "outcomes1", "educemp",
              "finance", "assets", "politics", "finques", "enum_items",
              "overall"))
## How long did each different treatment take?
timers_out <- timers_sub[, mean(treat_time, na.rm = TRUE), by = treat]
setorder(timers_out, treat)
setnames(timers_out, c("Treatment", "Average Time for Treatment (Seconds)"))
timers_out[, Treatment := plyr::mapvalues(Treatment, 1:6,
                                      c("Control", "Horizontal Accountability",
                                        "Vertical Accountability",
                                        "Black Sheep",
                                        "Hor. Acct. + Black Sheep",
                                        "Vert. Acct. + Black Sheep"))]
## Write out
stargazer(timers_out, type = "latex", summary = FALSE, rownames = FALSE, digits = 2,
          title = "Length of Treatment Assignment", label = "tab:treat-time",
          out = file.path(tabs_dir, "treat_time.tex"))

## Table 15: all treatments
mymod <- lm(out_ind ~ treatlabs, data = newfull)
stargazer(mymod, type = "latex", digits = 2, style = "apsr",
          title = "Effect of All Treatments",
          label = "tab:modbasic-all",
          dep.var.labels = c("Index Outcome"),
          order = c(9, 1:8),
          covariate.labels = c("Constant", levels(newfull$treatlabs)[-1]),
          out = file.path(tabs_dir, "modbasic_all.tex"))

## Table 16: tobit model
mod_tobit <- tobit(out_ind ~ treatcomb * musVhindu, left = 1, right = 4,
                  data = subdat, weights = ipw)
stargazer(mod_tobit, type = "latex", digits = 2,
          order = paste0("^", c("Constant", "treatcomb1", "musVhindu", "treatcomb1:musVhindu"),
                         "$"),
          covariate.labels = c("Constant", "Treatments (Combined)", "Muslim",
                               "Treatments (Combined) x Muslim"),
          dep.var.caption = "Dependent Variable: Index of Favorability toward Drainage Program",
          dep.var.labels.include = FALSE,
          out = file.path(tabs_dir, "mod_tobit.tex"))

## Figure 5: histogram
ggplot(subdat, aes(x = out_ind)) +
    geom_histogram(bins = 16) +
    labs(x = "Favorability toward Drainage Program (Index)",
         y = "Count")
ggsave(file.path(figs_dir, "index_hist.png"), width = 5.5, height = 4)

## Table 17: by enumerator religion
mod_int_enumMuslim <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                         data = subdat[enum_rel == "Muslim"])
mod_int_enumHindu <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                        data = subdat[enum_rel == "Hindu"])
stargazer(mod_int_enumHindu, mod_int_enumMuslim, type = "latex", digits = 2,
          order = paste0("^", c("Constant", "treatcomb1", "musVhindu", "treatcomb1:musVhindu"),
                         "$"),
          covariate.labels = c("Constant", "Treatments (Combined)", "Muslim",
                               "Treatments (Combined) x Muslim"),
          dep.var.caption = "Dependent Variable: Index of Favorability toward Drainage Program",
          dep.var.labels.include = FALSE,
          column.labels = c("Hindu Enumerators", "Muslim Enumerators"),
          model.numbers = FALSE,
          out = file.path(tabs_dir, "mod_int_enumeff.tex"))

## In text - Basti as neighborhood
table(subdat$neighborhood, useNA = "ifany")
2593 / nrow(subdat)

## Table 18: diversity effects
subdat[, divmod := div50m]
mod_div1 <- lm(out_ind ~ treatcomb * divmod, weights = ipw, data = subdat)
subdat[, divmod := div100m]
mod_div2 <- lm(out_ind ~ treatcomb * divmod, weights = ipw, data = subdat)
subdat[, divmod := div150m]
mod_div3 <- lm(out_ind ~ treatcomb * divmod, weights = ipw, data = subdat)
stargazer(mod_div1, mod_div2, mod_div3, type = "latex", digits = 2,
          order = paste0("^", c("Constant", "treatcomb1", "divmod", "treatcomb1:divmod"),
                         "$"),
          covariate.labels = c("Constant", "Treatments (Combined)", "Diversity",
                               "Treatments (Combined) x Diversity"),
          dep.var.labels = "Dependent Variable: Index of Favorability",
          column.labels = c("50 Meter Radius", "100 Meter Radius", "150 Meter Radius"),
          title = "Regression of Favorability toward Drainage Program, By Different Diversity Measures",
          label = "tab:outs-divmods",
          out = file.path(tabs_dir, "mod_divs.tex"))

## Figure 6: interaction with diversity
myvars <- c("out_ind", "treatcomb", "musVhindu", "div100m", "ipw")
tmpdat <- na.omit(subdat[, ..myvars])
mod_div <- interflex(data = tmpdat, Y = "out_ind", D = "treatcomb", X = "div100m",
                     estimator = "binning", weights = "ipw",
                     Ylabel = "Marginal Effect of Treatment on\nIndex of Favorability toward Drainage Program",
                     Xlabel = "Diversity (within 100m radius)",
                     cex.lab = 0.7, cex.axis = 0.7)
plot(mod_div)
ggsave(file.path(figs_dir, "mod_div.png"), width = 5, height = 4)

## Table 19: by % Muslim
tmpdat <- subdat[, list(perc_muslim = mean(musVhindu, na.rm = TRUE),
                        N = .N),
                 by = settlement]
setorder(tmpdat, perc_muslim)
settlements_low <- tmpdat[perc_muslim < 0.1]$settlement
settlements_med <- tmpdat[perc_muslim > 0.1 & perc_muslim < 0.5]$settlement
settlements_high <- tmpdat[perc_muslim > 0.5]$settlement
settlements_veryhigh <- tmpdat[perc_muslim > 0.8]$settlement
subdat[, settlement_type := NA_character_]
subdat[settlement %in% settlements_low, settlement_type := "Low (<10%)"]
subdat[settlement %in% settlements_med, settlement_type := "Medium (>10%, <50%)"]
subdat[settlement %in% settlements_high, settlement_type := "High (>50%)"]
subdat[, settlement_type := factor(settlement_type,
                                   levels = c("Low (<10%)", "Medium (>10%, <50%)",
                                              "High (>50%)"))]
mods_settletype <- lapply(levels(subdat$settlement_type),
                          function(mysub, myform, mydat, mywt) {
    lm(as.formula(myform), mydat, subset = settlement_type == mysub, weights = mywt)
}, myform = "out_ind ~ treatcomb * musVhindu", mydat = subdat, mywt = subdat$ipw)
tmpdat <- subdat[, .N, by = list(settlement_type, musVhindu)]
setorder(tmpdat, musVhindu, settlement_type)
## Adding another model
add_mod <- lm(out_ind ~ treatcomb * musVhindu, subdat, settlement %in% settlements_veryhigh,
              weights = ipw)
stargazer(mods_settletype[c(1, 3)], add_mod, type = "latex", digits = 2,
          order = paste0("^", c("Constant", "treatcomb1", "musVhindu", "treatcomb1:musVhindu"),
                         "$"),
          covariate.labels = c("Constant", "Treatments (Combined)", "Muslim",
                               "Treatments (Combined) x Muslim"),
          dep.var.caption = "Dependent Variable: Index of Favorability toward Drainage Program",
          dep.var.labels.include = FALSE,
          title = "Main Model by Percent Muslim in Select Neighborhoods",
          label = "tab:mod_musneighborhood",
          column.labels = c("Low (<10\\%)", "High (>50\\%)", "Very High (>80\\%)"),
          model.numbers = FALSE,
          add.lines = list(c("\\# Hindu Respondents",
                             c(tmpdat$N[1:2], nrow(subdat[musVhindu == 0 & settlement %in% settlements_veryhigh]))),
                           c("\\# Muslim Respondents",
                             c(tmpdat$N[4:5], nrow(subdat[musVhindu == 1 & settlement %in% settlements_veryhigh])))),
          font.size = "small",
          out = file.path(tabs_dir, "mods_settletype.tex"))

## Table 20: levels of trust
poleff_vars <- grep("poleff_", names(mydat), value = TRUE)[1:6]
poleff_means <- subdat[, lapply(.SD, mean, na.rm = TRUE), by = musVhindu,
                       .SDcols = poleff_vars]
poleff_sds <- subdat[, lapply(.SD, sd, na.rm = TRUE), by = musVhindu,
                     .SDcols = poleff_vars]
poleff_tab <- data.table(
    " " = c("Prime Minister/Central Government", "Chief Minister/Delhi Government",
           "The MCD", "Political Parties", "The Police", "Your Local Pradhan"),
    "Mean" = t(poleff_means)[2:7, 1],
    "SD" = t(poleff_sds)[2:7, 1],
    "Mean" = t(poleff_means)[2:7, 2],
    "SD" = t(poleff_sds)[2:7, 2]
)
stargazer(poleff_tab, type = "latex", summary = FALSE, digits = 2, rownames = FALSE,
          out = file.path(tabs_dir, "poleff_byrel.tex"))

## Table 21
with(subdat, table(musVhindu, pradhan_influence_orig))

## Table 22
mod1 <- lm(out_ind ~ treatcomb * caste_levels2, subdat, musVhindu == 0, ipw)
mod2 <- lm(out_ind ~ treatcomb * caste_levels2, subdat, musVhindu == 1, ipw)
stargazer(mod1, mod2, type = "latex", digits = 2, style = "apsr",
          title = "Regression of Favorability Index on Treatment, by Religion and Caste",
          label = "tab:reg-rel-caste",
          dep.var.labels = "Dependent Variable: Index of Favorability",
          column.labels = c("Hindu", "Muslim"),
          order = c("Constant", "^treatcomb1$", "^caste_levels2",
                    "treatcomb1:caste_levels2"),
          covariate.labels = c("Constant", "Treatments (Combined)", "Caste",
                               "Treatments (Combined) x Caste"),
          out = file.path(tabs_dir, "reg_caste_rel.tex"))

## Figure 7
modsept_int_caste_low <- lm(out_ind ~ treat * musVhindu,
                         data = subdat, subset = caste_levels2 == 0)
modsept_int_caste_hi <- lm(out_ind ~ treat * musVhindu,
                         data = subdat, subset = caste_levels2 == 1)
mymods <- list(modsept_int_caste_low, modsept_int_caste_hi)
modsept_int_caste <- lapply(mymods, function(mymod) {
    myout <- cbind(coef(mymod)[8:12],
                   confint(mymod)[8:12, ],
                   confint(mymod, level = 0.9)[8:12, ])
    return(as.data.frame(myout))
})
modsept_int_caste <- rbindlist(modsept_int_caste)
setnames(modsept_int_caste, c("fit", "lwr95", "upr95", "lwr90", "upr90"))
modsept_int_caste[, rel_quant := rep(c("Low", "High"), each = 5)]
modsept_int_caste[, rel_quant := factor(rel_quant, levels = c("Low", "High"))]
modsept_int_caste[, treat := rep(c("Horizontal\nAccountability", "Vertical\nAccountability",
                                 "Black Sheep", "Hor. Acct. x\nBlack Sheep", "Vert. Acct. x\nBlack Sheep"),
                               2)]
modsept_int_caste[, treat := factor(treat,
                                  levels = c("Horizontal\nAccountability", "Vertical\nAccountability",
                                             "Black Sheep", "Hor. Acct. x\nBlack Sheep", "Vert. Acct. x\nBlack Sheep"))]
p01 <- ggplot(modsept_int_caste, aes(x = rel_quant, y = fit, group = treat)) +
    geom_linerange(aes(ymin = lwr90, ymax = upr90), size = 1.25,
                   position = position_dodge(width = 0.4)) +
    geom_linerange(aes(ymin = lwr95, ymax = upr95), size = 0.6,
                   position = position_dodge(width = 0.4)) +
    geom_point(size = 2, position = position_dodge(width = 0.4)) +
    geom_line(size = 0.6, position = position_dodge(width = 0.4)) +
    labs(x = "Caste (Low vs. High)",
         y = "Interaction Estimate for Religion,\nCombined Model, 90/95% CIs") +
    facet_wrap(~ treat, nrow = 1)
ggsave(file.path(figs_dir, "mods_int_bycaste_bytreat.png"), p01,
       width = 6, height = 4.3)

## Figure 8
modsept_int_relsocties_low <- lm(out_ind ~ treat * musVhindu,
                         data = subdat, subset = relind_socties <= mymedians[3])
modsept_int_relsocties_hi <- lm(out_ind ~ treat * musVhindu,
                         data = subdat, subset = relind_socties > mymedians[3])
mymods <- list(modsept_int_relsocties_low, modsept_int_relsocties_hi)
modsept_int_rel <- lapply(mymods, function(mymod) {
    myout <- cbind(coef(mymod)[8:12],
                   confint(mymod)[8:12, ],
                   confint(mymod, level = 0.9)[8:12, ])
    return(as.data.frame(myout))
})
modsept_int_rel <- rbindlist(modsept_int_rel)
setnames(modsept_int_rel, c("fit", "lwr95", "upr95", "lwr90", "upr90"))
modsept_int_rel[, rel_quant := rep(c("Low", "High"), each = 5)]
modsept_int_rel[, rel_quant := factor(rel_quant, levels = c("Low", "High"))]
modsept_int_rel[, treat := rep(c("Horizontal\nAccountability", "Vertical\nAccountability",
                                 "Black Sheep", "Hor. Acct. x\nBlack Sheep", "Vert. Acct. x\nBlack Sheep"),
                               2)]
modsept_int_rel[, treat := factor(treat,
                                  levels = c("Horizontal\nAccountability", "Vertical\nAccountability",
                                             "Black Sheep", "Hor. Acct. x\nBlack Sheep", "Vert. Acct. x\nBlack Sheep"))]
p01 <- ggplot(modsept_int_rel, aes(x = rel_quant, y = fit, group = treat)) +
    geom_linerange(aes(ymin = lwr90, ymax = upr90), size = 1.25,
                   position = position_dodge(width = 0.4)) +
    geom_linerange(aes(ymin = lwr95, ymax = upr95), size = 0.6,
                   position = position_dodge(width = 0.4)) +
    geom_point(size = 2, position = position_dodge(width = 0.4)) +
    geom_line(size = 0.6, position = position_dodge(width = 0.4)) +
    labs(x = "Level of Religious Social Ties",
         y = "Interaction Estimate for Religion,\nCombined Model, 90/95% CIs") +
    facet_wrap(~ treat, nrow = 1)
ggsave(file.path(figs_dir, "mods_int_byrelind3_bytreat.png"), p01,
       width = 6, height = 4.3)

## Table 23
subdat[, relind_socties_d := NA_integer_]
subdat[relind_socties > mymedians[3], relind_socties_d := 1]
subdat[relind_socties <= mymedians[3], relind_socties_d := 0]
tmp1 <- na.omit(subdat[treatcomb == 0, mean(out_ind, na.rm = TRUE), by = list(musVhindu)])
tmp2 <- subdat[treatcomb == 0, mean(out_ind, na.rm = TRUE), by = list(musVhindu, relind_socties_d)] %>%
    na.omit() %>%
    setorder(musVhindu, relind_socties_d)
tmp3 <- na.omit(subdat[treatcomb == 1, mean(out_ind, na.rm = TRUE), by = list(musVhindu)])
tmp4 <- subdat[treatcomb == 1, mean(out_ind, na.rm = TRUE), by = list(musVhindu, relind_socties_d)] %>%
    na.omit() %>%
    setorder(musVhindu, relind_socties_d)
tmp5 <- subdat[relind_socties == 3 & musVhindu == 1, mean(out_ind, na.rm = TRUE),
               by = list(treatcomb)] %>%
    na.omit() %>%
    setorder(treatcomb)
ptab2 <- data.table(
    " " = c("Hindu", "", "", "Muslim", "", "", ""),
    "  " = c(rep(c("Total", "Low", "High"), 2), "Very High"),
    Control = c(tmp1$V1[1], tmp2$V1[1:2], tmp1$V1[2], tmp2$V1[3:4], tmp5$V1[1]),
    Treatment = c(tmp3$V1[1], tmp4$V1[1:2], tmp3$V1[2], tmp4$V1[3:4], tmp5$V1[2])
)
ptab2[, Difference := Treatment - Control]
ptab2_out <- stargazer(ptab2, type = "latex", digits = 2, summary = FALSE,
                       rownames = FALSE,
                       title = "Difference in Means by Religion and Religious Ties",
                       label = "tab:p-relties",
                       notes = c("Low is below median values (2 or less); High is above median values (2.5 or higher).",
                                 "Very High indicates those who chose the highest value of the religious ties scale (3)."))
my_row <- which(str_detect(ptab2_out, "Muslim"))
ptab2_out <- c(ptab2_out[1:(my_row - 1)], "\\cline{2-5}",
               ptab2_out[my_row:length(ptab2_out)])
ptab2_out <- str_replace_all(ptab2_out, "\\.00[0-9]+", "\\.00")
ptab2_out <- str_replace_all(ptab2_out, "ccccc", "clccc")
ptab2_out <- str_replace_all(ptab2_out, "Low", "\\\\hspace{3mm}Low")
ptab2_out <- str_replace_all(ptab2_out, "& High", "& \\\\hspace{3mm}High")
ptab2_out <- str_replace_all(ptab2_out, "Very High", "\\\\hspace{3mm}Very High")
writeLines(ptab2_out, file.path(tabs_dir, "p_relties.tex"))

## Table 24
tmp1 <- subdat[treatcomb == 0 & musVhindu == 1 & caste_levels2 == 1,
               mean(out_ind, na.rm = TRUE),
               by = list(relind_socties_d)] %>%
    na.omit() %>%
    setorder(relind_socties_d)
tmp2 <- subdat[treatcomb == 0 & musVhindu == 1 & caste_levels2 == 1 & relind_socties == 3,
               mean(out_ind, na.rm = TRUE)]
tmp3 <- subdat[treatcomb == 1 & musVhindu == 1 & caste_levels2 == 1,
               mean(out_ind, na.rm = TRUE),
               by = list(relind_socties_d)] %>%
    na.omit() %>%
    setorder(relind_socties_d)
tmp4 <- subdat[treatcomb == 1 & musVhindu == 1 & caste_levels2 == 1 & relind_socties == 3,
               mean(out_ind, na.rm = TRUE)]
tmp5 <- subdat[musVhindu == 1, mean(out_ind, na.rm = TRUE), list(treatcomb)] %>%
    na.omit() %>%
    setorder(treatcomb)
ptab3 <- data.table(
    " " = c("Muslims Overall", "Low", "High", "Very High"),
    Control = c(tmp5$V1[1], tmp1$V1, tmp2),
    Treatment = c(tmp5$V1[2], tmp3$V1, tmp4)
)
ptab3[, Difference := Treatment - Control]
ptab3_out <- stargazer(ptab3, type = "latex", digits = 2, summary = FALSE,
                       rownames = FALSE,
                       title = "Difference in Means for Upper-Caste Muslims, by Religious Ties",
                       label = "tab:p-relties-upper",
                       notes = c("Low is below median values (2 or less); High is above median values (2.5 or higher).",
                                 "Very High indicates those who chose the highest value of the religious ties scale (3)."))
ptab3_out <- str_replace_all(ptab3_out, "cccc", "lccc")
ptab3_out <- str_replace_all(ptab3_out, "Low", "\\\\hspace{3mm}Low")
ptab3_out <- str_replace_all(ptab3_out, "^High", "\\\\hspace{3mm}High")
ptab3_out <- str_replace_all(ptab3_out, "Very High", "\\\\hspace{3mm}Very High")
writeLines(ptab3_out, file.path(tabs_dir, "p_relties_upper.tex"))

## Table 25
tmp1 <- na.omit(subdat[treatcomb == 0, mean(out_ind, na.rm = TRUE), by = list(musVhindu)])
tmp2 <- subdat[treatcomb == 0, mean(out_ind, na.rm = TRUE), by = list(musVhindu, caste_levels2)] %>%
    na.omit() %>%
    setorder(musVhindu, caste_levels2)
tmp3 <- na.omit(subdat[treatcomb == 1, mean(out_ind, na.rm = TRUE), by = list(musVhindu)])
tmp4 <- subdat[treatcomb == 1, mean(out_ind, na.rm = TRUE), by = list(musVhindu, caste_levels2)] %>%
    na.omit() %>%
    setorder(musVhindu, caste_levels2)
ptab1 <- data.table(
    " " = c("Hindu", "", "", "Muslim", "", ""),
    "  " = rep(c("Total", "Lower Caste", "Upper Caste"), 2),
    Control = c(tmp1$V1[1], tmp2$V1[1:2], tmp1$V1[2], tmp2$V1[3:4]),
    Treatment = c(tmp3$V1[1], tmp4$V1[1:2], tmp3$V1[2], tmp4$V1[3:4])
)
ptab1[, Difference := Treatment - Control]
ptab1_out <- stargazer(ptab1, type = "latex", digits = 2, summary = FALSE,
                       rownames = FALSE,
                       title = "Difference in Means by Religion and Caste",
                       label = "tab:p-caste-1")
my_row <- which(str_detect(ptab1_out, "Muslim"))
ptab1_out <- c(ptab1_out[1:(my_row - 1)], "\\cline{2-5}",
               ptab1_out[my_row:length(ptab1_out)])
ptab1_out <- str_replace_all(ptab1_out, "\\.00[0-9]", "\\.00")
ptab1_out <- str_replace_all(ptab1_out, "ccccc", "clccc")
ptab1_out <- str_replace_all(ptab1_out, "Lower", "\\\\hspace{3mm}Lower")
ptab1_out <- str_replace_all(ptab1_out, "Upper", "\\\\hspace{3mm}Upper")
writeLines(ptab1_out, file.path(tabs_dir, "p_caste_1.tex"))

## Figure 9
myvars <- c("relind_private", "relind_public", "relind_socties")
mymedians <- apply(subdat[musVhindu == 1, ..myvars], 2, median, na.rm = TRUE)
## The median for prayer provides no variation, use the value just below the
## median
mymedians[1] <- 4
with(subdat, table(musVhindu, relind_private <= mymedians[1], useNA = "ifany"))
with(subdat, table(musVhindu, relind_public <= mymedians[2], useNA = "ifany"))
with(subdat, table(musVhindu, relind_socties <= mymedians[3], useNA = "ifany"))
## Now estimate models using these as the dividing factor
mod_int_relpriv_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                          data = subdat, subset = relind_private <= mymedians[1])
mod_int_relpriv_hi <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                         data = subdat, subset = relind_private > mymedians[1])
mod_int_relpub_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                         data = subdat, subset = relind_public <= mymedians[2])
mod_int_relpub_hi <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                        data = subdat, subset = relind_public > mymedians[2])
mod_int_relsocties_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                             data = subdat, subset = relind_socties <= mymedians[3])
mod_int_relsocties_hi <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                            data = subdat, subset = relind_socties > mymedians[3])
mod_caste_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                    data = subdat, subset = caste_levels2 == 0)
mod_caste_high <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                     data = subdat, subset = caste_levels2 == 1)
relmods <- list(mod_int_relpriv_low, mod_int_relpriv_hi,
                mod_int_relpub_low, mod_int_relpub_hi,
                mod_int_relsocties_low, mod_int_relsocties_hi,
                mod_caste_low, mod_caste_high)
## Creating dataset for plot
mods_int_rel <- lapply(relmods, function(mymod) {
    myout <- rbind(
        c(coef(mymod)[2], confint(mymod)[2, ], confint(mymod, level = 0.9)[2, ]),
        c(coef(mymod)[4], confint(mymod)[4, ], confint(mymod, level = 0.9)[4, ])
    )
    return(as.data.frame(myout))
})
mods_int_rel <- rbindlist(mods_int_rel)
setnames(mods_int_rel, c("fit", "lwr95", "upr95", "lwr90", "upr90"))
mods_int_rel[, musVhindu := rep(c("Hindu", "Muslim"), 8)]
mods_int_rel[, rel_quant := rep(c("Low", "Low", "High", "High"), 4)]
mods_int_rel[, rel_quant := factor(rel_quant, levels = c("Low", "High"))]
rel_factors_labs <- c("Private Practice\n(Prayer)", "Public Practice",
                      "Religious Ties", "Caste")
mods_int_rel[, rel_factor := rep(rel_factors_labs, each = 4)]
mods_int_rel[, rel_factor := factor(rel_factor, levels = rev(rel_factors_labs))]
table(subdat$relact_donations, useNA = "ifany")
median(subdat$relact_donations, na.rm = TRUE)
mod_int_donations_low <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                            data = subdat, subset = relact_donations < 5)
mod_int_donations_hi <- lm(out_ind ~ treatcomb * musVhindu, weights = ipw,
                           data = subdat, subset = relact_donations >= 5)
donmods <- list(mod_int_donations_low, mod_int_donations_hi)
mods_int_don <- lapply(donmods, function(mymod) {
    myout <- rbind(
        c(coef(mymod)[2], confint(mymod)[2, ], confint(mymod, level = 0.9)[2, ]),
        c(coef(mymod)[4], confint(mymod)[4, ], confint(mymod, level = 0.9)[4, ])
    )
    return(as.data.frame(myout))
})
mods_int_don <- rbindlist(mods_int_don)
setnames(mods_int_don, c("fit", "lwr95", "upr95", "lwr90", "upr90"))
mods_int_don[, musVhindu := rep(c("Hindu", "Muslim"), 2)]
mods_int_don[, rel_quant := c("Low", "Low", "High", "High")]
mods_int_don[, rel_quant := factor(rel_quant, levels = c("Low", "High"))]
rel_factors_labs <- c("Religious Donations")
mods_int_don[, rel_factor := rel_factors_labs]
mods_int_don[, rel_factor := factor(rel_factor, levels = rev(rel_factors_labs))]
mods_int_rel[, rel_factor := as.character(rel_factor)]
mods_int_don[, rel_factor := as.character(rel_factor)]
rel_factors_labs <- c("Private Practice\n(Prayer)", "Public Practice",
                      "Religious Ties", "Religious Donations", "Caste")
rbindlist(list(mods_int_rel, mods_int_don)) %>%
    .[, rel_factor := factor(rel_factor, levels = rel_factors_labs)] %>%
    .[musVhindu == "Muslim" &
      rel_factor %in% c("Private Practice\n(Prayer)", "Public Practice",
                        "Religious Donations")] %>%
    ggplot(aes(x = rel_quant, y = fit, group = musVhindu)) +
    geom_linerange(aes(ymin = lwr90, ymax = upr90), size = 1.25,
                   position = position_dodge(width = 0.4)) +
    geom_linerange(aes(ymin = lwr95, ymax = upr95), size = 0.6,
                   position = position_dodge(width = 0.4)) +
    geom_point(size = 2, position = position_dodge(width = 0.4)) +
    geom_line(size = 0.6, position = position_dodge(width = 0.4)) +
    labs(x = "Level of Moderating Factor",
         y = "Interaction Estimate for Religion,\nCombined Model, 90/95% CIs") +
    facet_wrap(~ rel_factor, nrow = 1)
ggsave(file.path(figs_dir, "mods_int_byall.png"), height = 3.25, width = 6)

## Table 26
tmp1 <- subdat %>%
    .[treatcomb == 0, mean(out_ind, na.rm = TRUE), by = list(musVhindu, assets_3level)] %>%
    .[, musVhindu := dplyr::recode(musVhindu, `0` = "Hindu", `1` = "Muslim")] %>%
    .[, assets_3level := dplyr::recode(assets_3level, `1` = "Low", `2` = "Middle", `3` = "High")] %>%
    pivot_wider(values_from = "V1", names_from = "musVhindu") %>%
    as.data.table() %>%
    .[, assets_3level := factor(assets_3level, c("Low", "Middle", "High"))] %>%
    setorder(assets_3level) %>%
    .[, Difference := Muslim - Hindu] %>%
    setnames(c("", "Hindu", "Muslim", "Difference"))
base_2 <- stargazer(tmp1, type = "latex", summary = FALSE, rownames = FALSE,
                    digits = 2,
                    title = "Outcome Index in Control Group, by Religion and Asset Class",
                    label = "tab:control-rel-assets")
base_2 <- str_replace_all(base_2, "cccc", "lccc")
writeLines(base_2, file.path(tabs_dir, "p_base_2.tex"))

## Table 27
tmp1 <- subdat %>%
    .[treatcomb == 0, mean(out_ind, na.rm = TRUE), by = list(musVhindu, diff_loan)] %>%
    na.omit() %>%
    setnames("diff_loan", "vars") %>%
    setorder(musVhindu, vars) %>%
    .[, vars := dplyr::recode(vars, `0` = "Didn't take loan", `1` = "Took out loan")]
tmp2 <- subdat %>%
    .[treatcomb == 0, mean(out_ind, na.rm = TRUE), by = list(musVhindu, diff_sell)] %>%
    na.omit() %>%
    setnames("diff_sell", "vars") %>%
    setorder(musVhindu, vars) %>%
    .[, vars := dplyr::recode(vars, `0` = "Didn't sell valuables", `1` = "Sold valuables")]
tmp3 <- subdat %>%
    .[treatcomb == 0, mean(out_ind, na.rm = TRUE), by = list(musVhindu, savings)] %>%
    na.omit() %>%
    setnames("savings", "vars") %>%
    setorder(musVhindu, vars) %>%
    .[, vars := dplyr::recode(vars, `0` = "Saved money", `0.5` = "Just got by",
                              `1` = "Spent savings")]
tmp_all <- rbindlist(list(tmp1, tmp2, tmp3)) %>%
    .[, musVhindu := dplyr::recode(musVhindu, `0` = "Hindu", `1` = "Muslim")] %>%
    pivot_wider(values_from = "V1", names_from = "musVhindu") %>%
    as.data.table() %>%
    .[, Difference := Muslim - Hindu] %>%
    setnames("vars", " ")
stargazer(tmp_all, type = "latex", summary = FALSE, rownames = FALSE, digits = 2,
          title = "Outcome Index in Control Group, by Religion and Financial Questions",
          label = "tab:control-finhard",
          out = file.path(tabs_dir, "p_base_3.tex"))

## Table 28
tmp1 <- subdat %>%
    .[, mean(relind_private, na.rm = TRUE), by = list(musVhindu, caste_levels2)] %>%
    na.omit() %>%
    setorder(musVhindu, caste_levels2)
tmp2 <- subdat %>%
    .[, mean(relind_public, na.rm = TRUE), by = list(musVhindu, caste_levels2)] %>%
    na.omit() %>%
    setorder(musVhindu, caste_levels2)
tmp3 <- subdat %>%
    .[, mean(relind_socties, na.rm = TRUE), by = list(musVhindu, caste_levels2)] %>%
    na.omit() %>%
    setorder(musVhindu, caste_levels2)
tmp1to3 <- rbindlist(list(tmp1, tmp2, tmp3)) %>%
    .[, var := rep(c("Private Practice", "Public Practice", "Religious Ties"),
                     each = 4)] %>%
    .[, musVhindu := dplyr::recode(musVhindu, `0` = "Hindu", `1` = "Muslim")] %>%
    .[, caste_levels2 := dplyr::recode(caste_levels2, `0` = "Lower Caste", `1` = "Upper Caste")] %>%
    pivot_wider(values_from = "V1", names_from = "musVhindu") %>%
    setcolorder(c("var", "caste_levels2", "Hindu", "Muslim"))
tmp4 <- subdat %>%
    .[, list(mean(relind_private, na.rm = TRUE),
             mean(relind_public, na.rm = TRUE),
             mean(relind_socties, na.rm = TRUE)),
      by = list(musVhindu)] %>%
    na.omit() %>%
    setorder(musVhindu) %>%
    .[, musVhindu := NULL] %>%
    transpose() %>%
    setnames(c("Hindu", "Muslim")) %>%
    .[, `:=`("var" = c("Private Practice", "Public Practice", "Religious Ties"),
             "caste_levels2" = "Total")] %>%
    setcolorder(c("var", "caste_levels2", "Hindu", "Muslim"))
ptab5 <- rbindlist(list(tmp1to3, tmp4)) %>%
    .[, var := factor(var, levels = c("Private Practice", "Public Practice", "Religious Ties")) ] %>%
    .[, caste_levels2 := factor(caste_levels2, levels = c("Lower Caste", "Upper Caste", "Total"))] %>%
    setorder(var, caste_levels2) %>%
    .[, var := c("Private Practice", "", "", "Public Practice", "", "",
                 "Religious Ties", "", "")] %>%
    setnames(c("var", "caste_levels2"), c(" ", "  "))
ptab5_out <- stargazer(ptab5, type = "latex", summary = FALSE, digits = 2,
                       rownames = FALSE,
                       title = "Averages for Religious Indices, by Caste and Religion",
                       label = "tab:p-relind-casterel")
my_rows <- c(which(str_detect(ptab5_out, "Public Practice")), which(str_detect(ptab5_out, "Religious Ties")))
ptab5_out <- c(ptab5_out[1:(my_rows[1] - 1)], "\\cline{2-4}",
               ptab5_out[my_rows[1]:(my_rows[2] - 1)], "\\cline{2-4}",
               ptab5_out[my_rows[2]:length(ptab5_out)])
writeLines(ptab5_out, file.path(tabs_dir, "p_relind_casterel.tex"))

## Table 29
tmp1 <- subdat[, list(Low = sum(assets_3level == 1) / .N,
              Middle = sum(assets_3level == 2) / .N,
              High = sum(assets_3level == 3) / .N),
       by = list(musVhindu, caste_levels2)] %>%
    na.omit() %>%
    setorder(musVhindu, caste_levels2)
tmp2 <- subdat[, list(Low = sum(assets_3level == 1) / .N,
                      Middle = sum(assets_3level == 2) / .N,
                      High = sum(assets_3level == 3) / .N)] %>%
    na.omit()
ptab4 <- data.table(
    " " = c("Hindu", "", "Muslim", "", ""),
    "  " = c(rep(c("Lower Caste", "Upper Caste"), 2), "Overall"),
    Low = label_percent(accuracy = 1)(c(tmp1$Low, tmp2$Low)),
    Middle = label_percent(accuracy = 1)(c(tmp1$Middle, tmp2$Middle)),
    High = label_percent(accuracy = 1)(c(tmp1$High, tmp2$High))
)
ptab4_out <- stargazer(ptab4, type = "latex", summary = FALSE,
                       rownames = FALSE,
                       title = "Proportions in Asset Classes, by Religion and Caste",
                       label = "tab:p-assets-perc",
                       notes = c("Middle = owns at least one of a bike, a cooler, or a fridge.",
                                 "High = owns at least one of a two/three/four wheel vehicle, a computer, or an AC"))
my_rows <- c(which(str_detect(ptab4_out, "Muslim")), which(str_detect(ptab4_out, "Overall")))
ptab4_out <- c(ptab4_out[1:(my_rows[1] - 1)], "\\cline{2-5}",
               ptab4_out[my_rows[1]:(my_rows[2] - 1)], "\\cline{2-5}",
               ptab4_out[my_rows[2]:length(ptab4_out)])
ptab4_out <- str_replace_all(ptab4_out, "ccccc", "clccc")
writeLines(ptab4_out, file.path(tabs_dir, "p_assets_perc.tex"))
